Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 45 additions & 5 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2326,12 +2326,52 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,

static mlir::omp::ScanOp
genScanOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
semantics::SemanticsContext &semaCtx, mlir::Location loc,
const ConstructQueue &queue, ConstructQueue::const_iterator item) {
semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
mlir::Location loc, const ConstructQueue &queue,
ConstructQueue::const_iterator item) {
mlir::omp::ScanOperands clauseOps;
genScanClauses(converter, semaCtx, item->clauses, loc, clauseOps);
return mlir::omp::ScanOp::create(converter.getFirOpBuilder(),
converter.getCurrentLocation(), clauseOps);
mlir::omp::ScanOp scanOp = mlir::omp::ScanOp::create(
converter.getFirOpBuilder(), converter.getCurrentLocation(), clauseOps);

/// Scan redution is not implemented with nested workshare loops, linear
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Scan redution is not implemented with nested workshare loops, linear
/// Scan reduction is not implemented with nested workshare loops, linear

/// clause, tiling
mlir::omp::LoopNestOp loopNestOp =
scanOp->getParentOfType<mlir::omp::LoopNestOp>();
mlir::omp::WsloopOp wsLoopOp = scanOp->getParentOfType<mlir::omp::WsloopOp>();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure that this is exactly what we want to get here. As per the spec, scan can be attached to a simd, worksharing-loop or composite combining them. That means that what we probably want is something like:

Suggested change
mlir::omp::WsloopOp wsLoopOp = scanOp->getParentOfType<mlir::omp::WsloopOp>();
llvm::SmallVector<mlir::omp::LoopWrapperInterface> loopWrappers;
loopNestOp.gatherWrappers(loopWrappers);
mlir::Operation *loopWrapperOp = loopWrappers.front()->getOperation();
// if (llvm::isa<mlir::omp::SimdOp>(loopWrapperOp)) TODO(loc, "unsupported simd?");
// if (loopWrappers.size() > 1) TODO(loc, "unsupported composite?");
auto wsloopOp = llvm::cast<mlir::omp::WsloopOp>(loopWrapperOp);

Also, without that change or something similar, we would base checks below on the wrong loop wrapper in a case like this:

omp.wsloop ... {
  omp.loop_nest ... {
    ...
    omp.simd ... {
      omp.loop_nest ... {
        // scan here: wsLoopOp would be the one wrapping the parent loop, rather than this one.
        omp.yield
      }
    }
    ...
    omp.yield
  }
}

bool isNested =
(loopNestOp.getNumLoops() > 1) ||
(wsLoopOp && (wsLoopOp->getParentOfType<mlir::omp::WsloopOp>()));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to guard against this case? Doesn't the scan directive just apply to the innermost loop in the case of something like this?

!$omp do
do i=1,10
  !$omp parallel do reduction(inscan,+: x)
  do j=1,10
    x = x + 1
    !$omp scan inclusive(x)
    v(i, j) = x
  end do
end do

That would be the sort of case where wsloopOp would have an omp.wsloop parent. In MLIR:

omp.wsloop {
  omp.loop_nest %i... {
    omp.parallel {
      // Below would be the wsloopOp picked up
      omp.wsloop reduction(...) {
        omp.loop_nest %j... {
          // Loop body with omp.scan here...
          omp.yield
        }
      }
      omp.terminator
    }
    omp.yield
  }
}

I get that handling collapsed loops (e.g. loopNestOp.getNumLoops() > 1) would have to be somewhat different, as scan would have to work across all of them, but I guess I don't quite see the need to also disallow regular nested loops.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The outer workshare loop can also have scan directive specified. The example specified does not compute right results. Thats why I have mentioned it as a TODO. Possibly something is going wrong with the temporary buffer allocation. Need to dig deeper into it

if (isNested)
TODO(loc, "Scan directive inside nested workshare loops");
if (wsLoopOp && !wsLoopOp.getLinearVars().empty())
TODO(loc, "Scan directive with linear clause");
if (loopNestOp.getTileSizes())
TODO(loc, "Scan directive with loop tiling");

// All loop indices should be loaded after the scan construct as otherwise,
// it would result in using the index variable across scan directive.
// (`Intra-iteration dependences from a statement in the structured
// block sequence that precede a scan directive to a statement in the
// structured block sequence that follows a scan directive must not exist,
// except for dependences for the list items specified in an inclusive or
// exclusive clause.`).
// TODO: Nested loops are not handled.
mlir::Region &region = loopNestOp->getRegion(0);
mlir::Value indexVal = fir::getBase(region.getArgument(0));
lower::pft::Evaluation *doConstructEval = eval.parentConstruct;
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation();
auto *doStmt = doLoop->getIf<parser::NonLabelDoStmt>();
assert(doStmt && "Expected do loop to be in the nested evaluation");
const auto &loopControl =
std::get<std::optional<parser::LoopControl>>(doStmt->t);
const parser::LoopControl::Bounds *bounds =
std::get_if<parser::LoopControl::Bounds>(&loopControl->u);
mlir::Operation *storeOp =
setLoopVar(converter, loc, indexVal, bounds->name.thing.symbol);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not quite sure whether we should use createAndSetPrivatizedLoopVar or setLoopVar in this case, perhaps @Meinersbur can help out figuring out when we can actually assert that the symbol is present in the symbol table regarding induction variables.

firOpBuilder.setInsertionPointAfter(storeOp);
return scanOp;
}

static mlir::omp::SectionsOp
Expand Down Expand Up @@ -3416,7 +3456,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
loc, queue, item);
break;
case llvm::omp::Directive::OMPD_scan:
newOp = genScanOp(converter, symTable, semaCtx, loc, queue, item);
newOp = genScanOp(converter, symTable, semaCtx, eval, loc, queue, item);
break;
case llvm::omp::Directive::OMPD_section:
llvm_unreachable("genOMPDispatch: OMPD_section");
Expand Down
34 changes: 34 additions & 0 deletions flang/test/Lower/OpenMP/Todo/nested-wsloop-scan.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
! Tests scan reduction behavior when used in nested workshare loops

! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s

program nested_scan_example
implicit none
integer, parameter :: n = 4, m = 5
integer :: a(n, m), b(n, m)
integer :: i, j
integer :: row_sum, col_sum

do i = 1, n
do j = 1, m
a(i, j) = i + j
end do
end do

!$omp parallel do reduction(inscan, +: row_sum) private(col_sum, j)
do i = 1, n
row_sum = row_sum + i
!$omp scan inclusive(row_sum)

col_sum = 0
!$omp parallel do reduction(inscan, +: col_sum)
do j = 1, m
col_sum = col_sum + a(i, j)
!CHECK: not yet implemented: Scan directive inside nested workshare loops
!$omp scan inclusive(col_sum)
b(i, j) = col_sum + row_sum
end do
!$omp end parallel do
end do
!$omp end parallel do
end program nested_scan_example
29 changes: 29 additions & 0 deletions flang/test/Lower/OpenMP/Todo/wsloop-scan-collapse.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
! Tests scan reduction behavior when used in nested workshare loops

! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s

program nested_loop_example
implicit none
integer :: i, j, x
integer, parameter :: N = 100, M = 200
real :: A(N, M), B(N, M)
x = 0

do i = 1, N
do j = 1, M
A(i, j) = i * j
end do
end do

!$omp parallel do collapse(2) reduction(inscan, +:x)
do i = 1, N
do j = 1, M
x = x + A(i,j)
!CHECK: not yet implemented: Scan directive inside nested workshare loops
!$omp scan inclusive(x)
B(i, j) = x
end do
end do
!$omp end parallel do

end program nested_loop_example
Loading
Loading