-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[OpenACC][CIR] Implement 'gang' lowering on `routine' #170506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is a bit more work than the worker/vector/seq in that gang takes an optional `dim` argument. The argument is always 1, 2, or 3 (constants!), and the other argument-types that gang allows elsewhere aren't valid here. For the IR, we had to add 2 overloads of `addGang`. The first just adds the 'valueless' one, which can just add to the one ArrayAttr. The second has to add to TWO lists. Note: The standard limits to only 1 `gang` per construct. We decided after evaluating it, that it really means 'per device-type region'. However, device_type isn't implemented yet, so we'll add tests for that when we do. At the moment, we added the device_type infrastructure however.
|
@llvm/pr-subscribers-openacc @llvm/pr-subscribers-mlir Author: Erich Keane (erichkeane) ChangesThis is a bit more work than the worker/vector/seq in that gang takes an optional For the IR, we had to add 2 overloads of Note: The standard limits to only 1 At the moment, we added the device_type infrastructure however. Full diff: https://github.com/llvm/llvm-project/pull/170506.diff 4 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
index a5322ac4e1930..0d76587dd48b1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenDeclOpenACC.cpp
@@ -303,14 +303,16 @@ void CIRGenModule::emitGlobalOpenACCRoutineDecl(const OpenACCRoutineDecl *d) {
namespace {
class OpenACCRoutineClauseEmitter final
: public OpenACCClauseVisitor<OpenACCRoutineClauseEmitter> {
+ CIRGenModule &cgm;
CIRGen::CIRGenBuilderTy &builder;
mlir::acc::RoutineOp routineOp;
llvm::SmallVector<mlir::acc::DeviceType> lastDeviceTypeValues;
public:
- OpenACCRoutineClauseEmitter(CIRGen::CIRGenBuilderTy &builder,
+ OpenACCRoutineClauseEmitter(CIRGenModule &cgm,
+ CIRGen::CIRGenBuilderTy &builder,
mlir::acc::RoutineOp routineOp)
- : builder(builder), routineOp(routineOp) {}
+ : cgm(cgm), builder(builder), routineOp(routineOp) {}
void emitClauses(ArrayRef<const OpenACCClause *> clauses) {
this->VisitClauseList(clauses);
@@ -333,6 +335,26 @@ class OpenACCRoutineClauseEmitter final
void VisitNoHostClause(const OpenACCNoHostClause &clause) {
routineOp.setNohost(/*attrValue=*/true);
}
+
+ void VisitGangClause(const OpenACCGangClause &clause) {
+ // Gang has an optional 'dim' value, which is a constant int of 1, 2, or 3.
+ // If we don't store any expressions in the clause, there are none, else we
+ // expect there is 1, since Sema should enforce that the single 'dim' is the
+ // only valid value.
+ if (clause.getNumExprs() == 0) {
+ routineOp.addGang(builder.getContext(), lastDeviceTypeValues);
+ } else {
+ assert(clause.getNumExprs() == 1);
+ auto [kind, expr] = clause.getExpr(0);
+ assert(kind == OpenACCGangKind::Dim);
+
+ llvm::APSInt curValue = expr->EvaluateKnownConstInt(cgm.getASTContext());
+ // The value is 1, 2, or 3, but 64 bit seems right enough.
+ curValue = curValue.sextOrTrunc(64);
+ routineOp.addGang(builder.getContext(), lastDeviceTypeValues,
+ curValue.getZExtValue());
+ }
+ }
};
} // namespace
@@ -373,6 +395,6 @@ void CIRGenModule::emitOpenACCRoutineDecl(
mlir::acc::getRoutineInfoAttrName(),
mlir::acc::RoutineInfoAttr::get(func.getContext(), funcRoutines));
- OpenACCRoutineClauseEmitter emitter{builder, routineOp};
+ OpenACCRoutineClauseEmitter emitter{*this, builder, routineOp};
emitter.emitClauses(clauses);
}
diff --git a/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp b/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp
index 81437e7e02ab1..6500b07ff1eb7 100644
--- a/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/routine-clauses.cpp
@@ -18,6 +18,27 @@ void Func5() {}
void Func6() {}
#pragma acc routine(Func6) nohost vector
+#pragma acc routine gang
+void Func7() {}
+
+void Func8() {}
+#pragma acc routine(Func8) gang
+
+#pragma acc routine gang(dim:1)
+void Func9() {}
+
+void Func10() {}
+#pragma acc routine(Func10) gang(dim:3)
+
+constexpr int Value = 2;
+
+#pragma acc routine gang(dim:Value) nohost
+void Func11() {}
+
+
+void Func12() {}
+#pragma acc routine(Func12) nohost gang(dim:Value)
+
// CHECK: cir.func{{.*}} @[[F1_NAME:.*Func1[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F1_R_NAME:.*]]]>}
// CHECK: acc.routine @[[F1_R_NAME]] func(@[[F1_NAME]]) seq nohost
@@ -32,7 +53,25 @@ void Func6() {}
// CHECK: acc.routine @[[F5_R_NAME]] func(@[[F5_NAME]]) vector
// CHECK: cir.func{{.*}} @[[F6_NAME:.*Func6[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F6_R_NAME:.*]]]>}
+//
+// CHECK: cir.func{{.*}} @[[F7_NAME:.*Func7[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F7_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F7_R_NAME]] func(@[[F7_NAME]]) gang
+//
+// CHECK: cir.func{{.*}} @[[F8_NAME:.*Func8[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F8_R_NAME:.*]]]>}
+//
+// CHECK: cir.func{{.*}} @[[F9_NAME:.*Func9[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F9_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F9_R_NAME]] func(@[[F9_NAME]]) gang(dim: 1 : i64)
+//
+// CHECK: cir.func{{.*}} @[[F10_NAME:.*Func10[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F10_R_NAME:.*]]]>}
+
+// CHECK: cir.func{{.*}} @[[F11_NAME:.*Func11[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F11_R_NAME:.*]]]>}
+// CHECK: acc.routine @[[F11_R_NAME]] func(@[[F11_NAME]]) gang(dim: 2 : i64)
+//
+// CHECK: cir.func{{.*}} @[[F12_NAME:.*Func12[^\(]*]]({{.*}}){{.*}} attributes {acc.routine_info = #acc.routine_info<[@[[F12_R_NAME:.*]]]>}
// CHECK: acc.routine @[[F2_R_NAME]] func(@[[F2_NAME]]) seq
// CHECK: acc.routine @[[F4_R_NAME]] func(@[[F4_NAME]]) worker nohost
// CHECK: acc.routine @[[F6_R_NAME]] func(@[[F6_NAME]]) vector nohost
+// CHECK: acc.routine @[[F8_R_NAME]] func(@[[F8_NAME]]) gang
+// CHECK: acc.routine @[[F10_R_NAME]] func(@[[F10_NAME]]) gang(dim: 3 : i64)
+// CHECK: acc.routine @[[F12_R_NAME]] func(@[[F12_NAME]]) gang(dim: 2 : i64)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 77d1a6f8d53b5..be50d38689218 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -3286,6 +3286,11 @@ def OpenACC_RoutineOp : OpenACC_Op<"routine", [IsolatedFromAbove]> {
void addVector(MLIRContext *, llvm::ArrayRef<DeviceType>);
// Add an entry to the 'worker' attribute for each additional device types.
void addWorker(MLIRContext *, llvm::ArrayRef<DeviceType>);
+ // Add an entry to the 'gang' attribute for each additional device type.
+ void addGang(MLIRContext *, llvm::ArrayRef<DeviceType>);
+ // Add an entry to the 'gang' attribute with a value for each additional
+ // device type.
+ void addGang(MLIRContext *, llvm::ArrayRef<DeviceType>, uint64_t);
}];
let assemblyFormat = [{
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 7039bbe1d11ec..e3614118b5ad6 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -4367,6 +4367,43 @@ void RoutineOp::addWorker(MLIRContext *context,
effectiveDeviceTypes));
}
+void RoutineOp::addGang(MLIRContext *context,
+ llvm::ArrayRef<DeviceType> effectiveDeviceTypes) {
+ setGangAttr(addDeviceTypeAffectedOperandHelper(context, getGangAttr(),
+ effectiveDeviceTypes));
+}
+
+void RoutineOp::addGang(MLIRContext *context,
+ llvm::ArrayRef<DeviceType> effectiveDeviceTypes,
+ uint64_t val) {
+ llvm::SmallVector<mlir::Attribute> dimValues;
+ llvm::SmallVector<mlir::Attribute> deviceTypes;
+
+ if (getGangDimAttr())
+ llvm::copy(getGangDimAttr(), std::back_inserter(dimValues));
+ if (getGangDimDeviceTypeAttr())
+ llvm::copy(getGangDimDeviceTypeAttr(), std::back_inserter(deviceTypes));
+
+ assert(dimValues.size() == deviceTypes.size());
+
+ if (effectiveDeviceTypes.empty()) {
+ dimValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), val));
+ deviceTypes.push_back(
+ acc::DeviceTypeAttr::get(context, acc::DeviceType::None));
+ } else {
+ for (DeviceType dt : effectiveDeviceTypes) {
+ dimValues.push_back(
+ mlir::IntegerAttr::get(mlir::IntegerType::get(context, 64), val));
+ deviceTypes.push_back(acc::DeviceTypeAttr::get(context, dt));
+ }
+ }
+ assert(dimValues.size() == deviceTypes.size());
+
+ setGangDimAttr(mlir::ArrayAttr::get(context, dimValues));
+ setGangDimDeviceTypeAttr(mlir::ArrayAttr::get(context, deviceTypes));
+}
+
//===----------------------------------------------------------------------===//
// InitOp
//===----------------------------------------------------------------------===//
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
@razvanlupusoru : would love your review of the ACC dialect changes. |
Nice improvement to add this API. |
This is a bit more work than the worker/vector/seq in that gang takes an optional `dim` argument. The argument is always 1, 2, or 3 (constants!), and the other argument-types that gang allows elsewhere aren't valid here. For the IR, we had to add 2 overloads of `addGang`. The first just adds the 'valueless' one, which can just add to the one ArrayAttr. The second has to add to TWO lists. Note: The standard limits to only 1 `gang` per construct. We decided after evaluating it, that it really means 'per device-type region'. However, device_type isn't implemented yet, so we'll add tests for that when we do. At the moment, we added the device_type infrastructure however.
This is a bit more work than the worker/vector/seq in that gang takes an optional
dimargument. The argument is always 1, 2, or 3 (constants!), and the other argument-types that gang allows elsewhere aren't valid here.For the IR, we had to add 2 overloads of
addGang. The first just adds the 'valueless' one, which can just add to the one ArrayAttr. The second has to add to TWO lists.Note: The standard limits to only 1
gangper construct. We decided after evaluating it, that it really means 'per device-type region'. However, device_type isn't implemented yet, so we'll add tests for that when we do.At the moment, we added the device_type infrastructure however.