-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Flang] Add code-object-version option #72638
[Flang] Add code-object-version option #72638
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-flang-driver Author: Dominik Adamski (DominikAdamski) ChangesInformation about code object version can be configured by the user for AMD GPU target and it needs to be placed in LLVM IR generated by Flang. Information about code object version in MLIR generated by the parser can be reused by other tools. There is no need to specify extra flags if we want to invoke MLIR tools (like fir-opt) separately. Full diff: https://github.com/llvm/llvm-project/pull/72638.diff 10 Files Affected:
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 811550416110b3d..e7eb94d174e75f8 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4688,7 +4688,7 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
- Visibility<[ClangOption, CC1Option]>,
+ Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
Values<"none,4,5">,
NormalizedValuesScope<"TargetOptions">,
NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 8bdd920c3dcbb79..e60c11bfbe8e38b 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -204,6 +204,14 @@ void Flang::AddAArch64TargetArgs(const ArgList &Args,
}
}
+void Flang::AddAMDGPUTargetArgs(const ArgList &Args,
+ ArgStringList &CmdArgs) const {
+ if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) {
+ StringRef Val = A->getValue();
+ CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val));
+ }
+}
+
void Flang::addTargetOptions(const ArgList &Args,
ArgStringList &CmdArgs) const {
const ToolChain &TC = getToolChain();
@@ -227,6 +235,9 @@ void Flang::addTargetOptions(const ArgList &Args,
case llvm::Triple::r600:
case llvm::Triple::amdgcn:
+ getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
+ AddAMDGPUTargetArgs(Args, CmdArgs);
+ break;
case llvm::Triple::riscv64:
case llvm::Triple::x86_64:
getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h
index 0141240b5d3ac90..8d35080e1c0c88b 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -63,6 +63,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
llvm::opt::ArgStringList &CmdArgs) const;
+ /// Add specific options for AMDGPU target.
+ ///
+ /// \param [in] Args The list of input driver arguments
+ /// \param [out] CmdArgs The list of output command arguments
+ void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args,
+ llvm::opt::ArgStringList &CmdArgs) const;
+
/// Extract offload options from the driver arguments and add them to
/// the command arguments.
/// \param [in] C The current compilation for the driver invocation
diff --git a/flang/include/flang/Frontend/CodeGenOptions.h b/flang/include/flang/Frontend/CodeGenOptions.h
index b86bb88610a9a4a..8d938c361a0aa23 100644
--- a/flang/include/flang/Frontend/CodeGenOptions.h
+++ b/flang/include/flang/Frontend/CodeGenOptions.h
@@ -85,6 +85,19 @@ class CodeGenOptions : public CodeGenOptionsBase {
RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'.
};
+ /// \brief Enumeration value for AMDGPU code object version, which is the
+ /// code object version times 100.
+ enum class CodeObjectVersionKind {
+ COV_None,
+ COV_2 = 200, // Unsupported.
+ COV_3 = 300, // Unsupported.
+ COV_4 = 400,
+ COV_5 = 500,
+ };
+
+ /// \brief Code object version for AMDGPU.
+ CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None;
+
/// Optimization remark with an optional regular expression pattern.
struct OptRemark {
RemarkKind Kind = RemarkKind::RK_Missing;
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index cb4f2d6a6225205..d96d313f85e8185 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -266,6 +266,15 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
opts.PrepareForThinLTO = true;
}
+ if (const llvm::opt::Arg *a = args.getLastArg(
+ clang::driver::options::OPT_mcode_object_version_EQ)) {
+ llvm::StringRef s = a->getValue();
+ if (s == "5")
+ opts.CodeObjectVersion = CodeGenOptions::CodeObjectVersionKind::COV_5;
+ if (s == "4")
+ opts.CodeObjectVersion = CodeGenOptions::CodeObjectVersionKind::COV_4;
+ }
+
// -f[no-]save-optimization-record[=<format>]
if (const llvm::opt::Arg *a =
args.getLastArg(clang::driver::options::OPT_opt_record_file))
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 1e4a2f3035f1f7f..6184f80eda34f53 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -244,7 +244,7 @@ static void setMLIRDataLayout(mlir::ModuleOp &mlirModule,
mlirModule->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec);
}
-static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
+static void addDependentLibs(mlir::ModuleOp &mlirModule,
CompilerInstance &ci) {
const std::vector<std::string> &libs =
ci.getInvocation().getCodeGenOpts().DependentLibs;
@@ -264,6 +264,37 @@ static void addDepdendentLibs(mlir::ModuleOp &mlirModule,
}
}
+// Add to MLIR code target specific items which are dependent on target
+// configuration specified by the user
+static void addTargetSpecificMLIRItems(mlir::ModuleOp &mlirModule,
+ CompilerInstance &ci) {
+ const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts();
+ const llvm::Triple triple(targetOpts.triple);
+ if (triple.isAMDGPU()) {
+ unsigned oclcABIVERsion;
+ const unsigned defaultOclcABIVERsion = 400;
+ mlir::OpBuilder builder(mlirModule.getContext());
+ const CodeGenOptions &codeGenOpts = ci.getInvocation().getCodeGenOpts();
+ if (codeGenOpts.CodeObjectVersion ==
+ CodeGenOptions::CodeObjectVersionKind::COV_None)
+ oclcABIVERsion = defaultOclcABIVERsion;
+ else
+ oclcABIVERsion = static_cast<unsigned>(codeGenOpts.CodeObjectVersion);
+
+ auto int32Type = builder.getI32Type();
+ auto covInfo = builder.create<mlir::LLVM::GlobalOp>(
+ mlirModule.getLoc(), int32Type, true, mlir::LLVM::Linkage::WeakODR,
+ "__oclc_ABI_version",
+ builder.getIntegerAttr(int32Type, oclcABIVERsion));
+ covInfo.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local);
+ covInfo.setAddrSpace(4);
+ covInfo.setVisibility_(mlir::LLVM::Visibility::Hidden);
+ builder.setInsertionPointToStart(mlirModule.getBody());
+ builder.insert(covInfo);
+ }
+ addDependentLibs(mlirModule, ci);
+}
+
bool CodeGenAction::beginSourceFileAction() {
llvmCtx = std::make_unique<llvm::LLVMContext>();
CompilerInstance &ci = this->getInstance();
@@ -365,8 +396,9 @@ bool CodeGenAction::beginSourceFileAction() {
Fortran::parser::Program &parseTree{*ci.getParsing().parseTree()};
lb.lower(parseTree, ci.getInvocation().getSemanticsContext());
- // Add dependent libraries
- addDepdendentLibs(*mlirModule, ci);
+ // Add target specific items like dependent libraries, target specific
+ // constants etc.
+ addTargetSpecificMLIRItems(*mlirModule, ci);
// run the default passes.
mlir::PassManager pm((*mlirModule)->getName(),
diff --git a/flang/test/Driver/code-object-version.f90 b/flang/test/Driver/code-object-version.f90
new file mode 100644
index 000000000000000..cd88ff7fe29a282
--- /dev/null
+++ b/flang/test/Driver/code-object-version.f90
@@ -0,0 +1,8 @@
+! RUN: not %flang -target amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=3 -S %s -o \
+! RUN: /dev/null 2>&1 | FileCheck --check-prefix=INVALID_VERSION %s
+
+! RUN: %flang -target x86_64-unknown-linux-gnu -mcode-object-version=3 -S %s -o \
+! RUN: /dev/null 2>&1 | FileCheck --check-prefix=UNUSED_PARAM %s
+
+! INVALID_VERSION: error: invalid integral value '3' in '-mcode-object-version=3'
+! UNUSED_PARAM: warning: argument unused during compilation: '-mcode-object-version=3' [-Wunused-command-line-argument]
diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90
index b276f1906e1a457..4ea89776416a6b8 100644
--- a/flang/test/Driver/driver-help-hidden.f90
+++ b/flang/test/Driver/driver-help-hidden.f90
@@ -114,6 +114,8 @@
! CHECK-NEXT: -I <dir> Add directory to the end of the list of include search paths
! CHECK-NEXT: -L <dir> Add directory to library search path
! CHECK-NEXT: -march=<value> For a list of available architectures for the target use '-mcpu=help'
+! CHECK-NEXT: -mcode-object-version=<value>
+! CHECK-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
! CHECK-NEXT: -mcpu=<value> For a list of available CPUs for the target use '-mcpu=help'
! CHECK-NEXT: -mllvm=<arg> Alias for -mllvm
! CHECK-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing
diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90
index 452c62541e72e61..6fb306d3196fbae 100644
--- a/flang/test/Driver/driver-help.f90
+++ b/flang/test/Driver/driver-help.f90
@@ -100,6 +100,8 @@
! HELP-NEXT: -I <dir> Add directory to the end of the list of include search paths
! HELP-NEXT: -L <dir> Add directory to library search path
! HELP-NEXT: -march=<value> For a list of available architectures for the target use '-mcpu=help'
+! HELP-NEXT: -mcode-object-version=<value>
+! HELP-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
! HELP-NEXT: -mcpu=<value> For a list of available CPUs for the target use '-mcpu=help'
! HELP-NEXT: -mllvm=<arg> Alias for -mllvm
! HELP-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing
@@ -232,6 +234,8 @@
! HELP-FC1-NEXT: -init-only Only execute frontend initialization
! HELP-FC1-NEXT: -I <dir> Add directory to the end of the list of include search paths
! HELP-FC1-NEXT: -load <dsopath> Load the named plugin (dynamic shared object)
+! HELP-FC1-NEXT: -mcode-object-version=<value>
+! HELP-FC1-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only)
! HELP-FC1-NEXT: -menable-no-infs Allow optimization to assume there are no infinities.
! HELP-FC1-NEXT: -menable-no-nans Allow optimization to assume there are no NaNs.
! HELP-FC1-NEXT: -mllvm <value> Additional arguments to forward to LLVM's option processing
diff --git a/flang/test/Lower/AMD/code_object_version.f90 b/flang/test/Lower/AMD/code_object_version.f90
new file mode 100644
index 000000000000000..160d2c9fb784e4c
--- /dev/null
+++ b/flang/test/Lower/AMD/code_object_version.f90
@@ -0,0 +1,11 @@
+!REQUIRES: amdgpu-registered-target
+!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 %s -o - | FileCheck --check-prefix=COV_NONE %s
+!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s
+!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s
+
+!COV_NONE: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
+!COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
+!COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32
+subroutine target_simple
+end subroutine target_simple
+
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
eb2710b
to
e5d339c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for this work, Dominik. It LGTM, but please wait for another review before merging. I have a couple of suggestions for small improvements, but let me know if they would take significant effort to address, as I'm not completely familiar with that part of the project.
const llvm::Triple triple(targetOpts.triple); | ||
if (triple.isAMDGPU()) { | ||
unsigned oclcABIVERsion; | ||
const unsigned defaultOclcABIVERsion = 400; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be possible/advisable to have this constant somewhere shared between clang and flang, so they are guaranteed to default to the same ABI version?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
"__oclc_ABI_version", | ||
builder.getIntegerAttr(int32Type, oclcABIVERsion)); | ||
covInfo.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local); | ||
covInfo.setAddrSpace(4); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this address space value defined in any enumeration that could be used here, rather than passing just a number? Of if they are documented somewhere, at least refer to it in a comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This address space is described here: https://llvm.org/docs/AMDGPUUsage.html#address-spaces . 4 corresponds to Constant Address space. There is an enum AddrSpace which describes the address spaces. It is defined as part of Clang TargetInfo: https://github.com/llvm/llvm-project/blob/main/clang/lib/Basic/Targets/AMDGPU.h .
I will move this enum to llvm directory as the next step. Currently I added only TODO comment because TargetInfo is one of basic Clang classes and I would like to do it as separate step in case of any regression.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All test files in this PR use hyphen (-
) rather than underscore (_
) ;-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
COV_2 = 200, // Unsupported. | ||
COV_3 = 300, // Unsupported. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does COV
stand for and what does unsupported
mean in this context? Why list any unsupported versions? Isn't it a bit like:
400
and500
are supported, and- everything else is not supported?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
COV_NONE means no emit of global item
COV_2,COV_3 -> unsupported
If user does not specify version then default version COV_4 is set
see file for more information about code-object version: https://llvm.org/docs/AMDGPUUsage.html#code-object-metadata .
BTW. Is it possible to set the default version for Flang in similar way as it is done for Clang? Could we use similar macro: MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4"> file ( https://github.com/llvm/llvm-project/blob/main/clang/include/clang/Driver/Options.td#L4716 ); and replace Clang struct TargetOpts with Flang struct ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW. Is it possible to set the default version for Flang in similar way as it is done for Clang?
Sadly, ATM, Flang does not use the "option marshaling" logic/macros. I am not aware of anyone working on this.
CompilerInstance &ci) { | ||
const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts(); | ||
const llvm::Triple triple(targetOpts.triple); | ||
if (triple.isAMDGPU()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please avoid indentation where possible (https://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code):
if (triple.isAMDGPU()) { | |
if (! triple.isAMDGPU()) | |
return |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
builder.setInsertionPointToStart(mlirModule.getBody()); | ||
builder.insert(covInfo); | ||
} | ||
addDependentLibs(mlirModule, ci); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My terminology might be off, but:
- most of this function adds some specific to a particular hardware target (i.e. AMDGPU),
addDependentLibs
is something linked to specific to a particular platform (i.e. MSVC/Windows).
My suggestion:
- keep
addDependentLibs
as an independent hook, - rename
addTargetSpecificMLIRItems
asaddAMDGPUSpecificMLIRItems
(we can rename this later if people want to add support for other targets).
@jsjodin - is that consistent with what you had in mind?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
/// \brief Enumeration value for AMDGPU code object version, which is the | ||
/// code object version times 100. | ||
enum class CodeObjectVersionKind { | ||
COV_None, | ||
COV_2 = 200, // Unsupported. | ||
COV_3 = 300, // Unsupported. | ||
COV_4 = 400, | ||
COV_5 = 500, | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If possible move this to llvm/include/llvm/Frontend/Driver/TargetOptions.h
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I moved it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for addressing my comments, the driver changes LGTM!
Please wait for other reviewers to approve before landing this. In particular, I am not qualified to tell whether the bits specific to AMD GPUs are correct :)
/// \brief Enumeration value for AMDGPU code object version, which is the | ||
/// code object version times 100. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nit]
/// \brief Enumeration value for AMDGPU code object version, which is the | |
/// code object version times 100. | |
/// \brief Enumeration value for AMDGPU code object version (COV), which is the | |
/// code object version times 100. |
COV_2 = 200, // Unsupported. | ||
COV_3 = 300, // Unsupported. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BTW. Is it possible to set the default version for Flang in similar way as it is done for Clang?
Sadly, ATM, Flang does not use the "option marshaling" logic/macros. I am not aware of anyone working on this.
Information about code object version can be configured by the user for AMD GPU target and it needs to be placed in LLVM IR generated by Flang. Information about code object version in MLIR generated by the parser can be reused by other tools. There is no need to specify extra flags if we want to invoke MLIR tools separately.
c76a13a
to
0488b44
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This commit causes test errors on buildbots. This reverts commit a8ac930.
Information about code object version can be configured by the user for AMD GPU target and it needs to be placed in LLVM IR generated by Flang. Information about code object version in MLIR generated by the parser can be reused by other tools. There is no need to specify extra flags if we want to invoke MLIR tools (like fir-opt) separately. Changes in comparison to a8ac93: * added information about required targets for test flang/test/Driver/driver-help.f90
Information about code object version can be configured by the user for AMD GPU target and it needs to be placed in LLVM IR generated by Flang.
Information about code object version in MLIR generated by the parser can be reused by other tools. There is no need to specify extra flags if we want to invoke MLIR tools (like fir-opt) separately.