-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[MLIR][OpenMP] Add scan reduction lowering to llvm #167031
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2326,12 +2326,52 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable, | |||||||||||||||
|
|
||||||||||||||||
| static mlir::omp::ScanOp | ||||||||||||||||
| genScanOp(lower::AbstractConverter &converter, lower::SymMap &symTable, | ||||||||||||||||
| semantics::SemanticsContext &semaCtx, mlir::Location loc, | ||||||||||||||||
| const ConstructQueue &queue, ConstructQueue::const_iterator item) { | ||||||||||||||||
| semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, | ||||||||||||||||
| mlir::Location loc, const ConstructQueue &queue, | ||||||||||||||||
| ConstructQueue::const_iterator item) { | ||||||||||||||||
| mlir::omp::ScanOperands clauseOps; | ||||||||||||||||
| genScanClauses(converter, semaCtx, item->clauses, loc, clauseOps); | ||||||||||||||||
| return mlir::omp::ScanOp::create(converter.getFirOpBuilder(), | ||||||||||||||||
| converter.getCurrentLocation(), clauseOps); | ||||||||||||||||
| mlir::omp::ScanOp scanOp = mlir::omp::ScanOp::create( | ||||||||||||||||
| converter.getFirOpBuilder(), converter.getCurrentLocation(), clauseOps); | ||||||||||||||||
|
|
||||||||||||||||
| /// Scan redution is not implemented with nested workshare loops, linear | ||||||||||||||||
| /// clause, tiling | ||||||||||||||||
| mlir::omp::LoopNestOp loopNestOp = | ||||||||||||||||
| scanOp->getParentOfType<mlir::omp::LoopNestOp>(); | ||||||||||||||||
| mlir::omp::WsloopOp wsLoopOp = scanOp->getParentOfType<mlir::omp::WsloopOp>(); | ||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure that this is exactly what we want to get here. As per the spec,
Suggested change
Also, without that change or something similar, we would base checks below on the wrong loop wrapper in a case like this: omp.wsloop ... {
omp.loop_nest ... {
...
omp.simd ... {
omp.loop_nest ... {
// scan here: wsLoopOp would be the one wrapping the parent loop, rather than this one.
omp.yield
}
}
...
omp.yield
}
} |
||||||||||||||||
| bool isNested = | ||||||||||||||||
| (loopNestOp.getNumLoops() > 1) || | ||||||||||||||||
| (wsLoopOp && (wsLoopOp->getParentOfType<mlir::omp::WsloopOp>())); | ||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to guard against this case? Doesn't the !$omp do
do i=1,10
!$omp parallel do reduction(inscan,+: x)
do j=1,10
x = x + 1
!$omp scan inclusive(x)
v(i, j) = x
end do
end doThat would be the sort of case where omp.wsloop {
omp.loop_nest %i... {
omp.parallel {
// Below would be the wsloopOp picked up
omp.wsloop reduction(...) {
omp.loop_nest %j... {
// Loop body with omp.scan here...
omp.yield
}
}
omp.terminator
}
omp.yield
}
}I get that handling collapsed loops (e.g.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The outer workshare loop can also have scan directive specified. The example specified does not compute right results. Thats why I have mentioned it as a TODO. Possibly something is going wrong with the temporary buffer allocation. Need to dig deeper into it |
||||||||||||||||
| if (isNested) | ||||||||||||||||
| TODO(loc, "Scan directive inside nested workshare loops"); | ||||||||||||||||
| if (wsLoopOp && !wsLoopOp.getLinearVars().empty()) | ||||||||||||||||
| TODO(loc, "Scan directive with linear clause"); | ||||||||||||||||
| if (loopNestOp.getTileSizes()) | ||||||||||||||||
| TODO(loc, "Scan directive with loop tiling"); | ||||||||||||||||
tblah marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||
|
|
||||||||||||||||
| // All loop indices should be loaded after the scan construct as otherwise, | ||||||||||||||||
| // it would result in using the index variable across scan directive. | ||||||||||||||||
| // (`Intra-iteration dependences from a statement in the structured | ||||||||||||||||
| // block sequence that precede a scan directive to a statement in the | ||||||||||||||||
| // structured block sequence that follows a scan directive must not exist, | ||||||||||||||||
| // except for dependences for the list items specified in an inclusive or | ||||||||||||||||
| // exclusive clause.`). | ||||||||||||||||
| // TODO: Nested loops are not handled. | ||||||||||||||||
| mlir::Region ®ion = loopNestOp->getRegion(0); | ||||||||||||||||
| mlir::Value indexVal = fir::getBase(region.getArgument(0)); | ||||||||||||||||
| lower::pft::Evaluation *doConstructEval = eval.parentConstruct; | ||||||||||||||||
| fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); | ||||||||||||||||
| lower::pft::Evaluation *doLoop = &doConstructEval->getFirstNestedEvaluation(); | ||||||||||||||||
| auto *doStmt = doLoop->getIf<parser::NonLabelDoStmt>(); | ||||||||||||||||
| assert(doStmt && "Expected do loop to be in the nested evaluation"); | ||||||||||||||||
| const auto &loopControl = | ||||||||||||||||
| std::get<std::optional<parser::LoopControl>>(doStmt->t); | ||||||||||||||||
| const parser::LoopControl::Bounds *bounds = | ||||||||||||||||
| std::get_if<parser::LoopControl::Bounds>(&loopControl->u); | ||||||||||||||||
| mlir::Operation *storeOp = | ||||||||||||||||
| setLoopVar(converter, loc, indexVal, bounds->name.thing.symbol); | ||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not quite sure whether we should use |
||||||||||||||||
| firOpBuilder.setInsertionPointAfter(storeOp); | ||||||||||||||||
| return scanOp; | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| static mlir::omp::SectionsOp | ||||||||||||||||
|
|
@@ -3416,7 +3456,7 @@ static void genOMPDispatch(lower::AbstractConverter &converter, | |||||||||||||||
| loc, queue, item); | ||||||||||||||||
| break; | ||||||||||||||||
| case llvm::omp::Directive::OMPD_scan: | ||||||||||||||||
| newOp = genScanOp(converter, symTable, semaCtx, loc, queue, item); | ||||||||||||||||
| newOp = genScanOp(converter, symTable, semaCtx, eval, loc, queue, item); | ||||||||||||||||
| break; | ||||||||||||||||
| case llvm::omp::Directive::OMPD_section: | ||||||||||||||||
| llvm_unreachable("genOMPDispatch: OMPD_section"); | ||||||||||||||||
|
|
||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| ! Tests scan reduction behavior when used in nested workshare loops | ||
|
|
||
| ! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s | ||
|
|
||
| program nested_scan_example | ||
| implicit none | ||
| integer, parameter :: n = 4, m = 5 | ||
| integer :: a(n, m), b(n, m) | ||
| integer :: i, j | ||
| integer :: row_sum, col_sum | ||
|
|
||
| do i = 1, n | ||
| do j = 1, m | ||
| a(i, j) = i + j | ||
| end do | ||
| end do | ||
|
|
||
| !$omp parallel do reduction(inscan, +: row_sum) private(col_sum, j) | ||
| do i = 1, n | ||
| row_sum = row_sum + i | ||
| !$omp scan inclusive(row_sum) | ||
|
|
||
| col_sum = 0 | ||
| !$omp parallel do reduction(inscan, +: col_sum) | ||
| do j = 1, m | ||
| col_sum = col_sum + a(i, j) | ||
| !CHECK: not yet implemented: Scan directive inside nested workshare loops | ||
| !$omp scan inclusive(col_sum) | ||
| b(i, j) = col_sum + row_sum | ||
| end do | ||
| !$omp end parallel do | ||
| end do | ||
| !$omp end parallel do | ||
| end program nested_scan_example |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| ! Tests scan reduction behavior when used in nested workshare loops | ||
|
|
||
| ! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s | ||
|
|
||
| program nested_loop_example | ||
| implicit none | ||
| integer :: i, j, x | ||
| integer, parameter :: N = 100, M = 200 | ||
| real :: A(N, M), B(N, M) | ||
| x = 0 | ||
|
|
||
| do i = 1, N | ||
| do j = 1, M | ||
| A(i, j) = i * j | ||
| end do | ||
| end do | ||
|
|
||
| !$omp parallel do collapse(2) reduction(inscan, +:x) | ||
| do i = 1, N | ||
| do j = 1, M | ||
| x = x + A(i,j) | ||
| !CHECK: not yet implemented: Scan directive inside nested workshare loops | ||
| !$omp scan inclusive(x) | ||
| B(i, j) = x | ||
| end do | ||
| end do | ||
| !$omp end parallel do | ||
|
|
||
| end program nested_loop_example |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.