diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h index ba3acd0295871..2049f03b28893 100644 --- a/clang/include/clang/Basic/TargetOptions.h +++ b/clang/include/clang/Basic/TargetOptions.h @@ -78,17 +78,9 @@ class TargetOptions { /// \brief If enabled, allow AMDGPU unsafe floating point atomics. bool AllowAMDGPUUnsafeFPAtomics = false; - /// \brief Enumeration value for AMDGPU code object version, which is the - /// code object version times 100. - enum CodeObjectVersionKind { - COV_None, - COV_2 = 200, // Unsupported. - COV_3 = 300, // Unsupported. - COV_4 = 400, - COV_5 = 500, - }; /// \brief Code object version for AMDGPU. - CodeObjectVersionKind CodeObjectVersion = CodeObjectVersionKind::COV_None; + llvm::CodeObjectVersionKind CodeObjectVersion = + llvm::CodeObjectVersionKind::COV_None; /// \brief Enumeration values for AMDGPU printf lowering scheme enum class AMDGPUPrintfKind { diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9689f12fd0141..7dd2755350f7a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4721,9 +4721,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee", def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group, HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">, - Visibility<[ClangOption, CC1Option]>, + Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>, Values<"none,4,5">, - NormalizedValuesScope<"TargetOptions">, + NormalizedValuesScope<"llvm::CodeObjectVersionKind">, NormalizedValues<["COV_None", "COV_4", "COV_5"]>, MarshallingInfoEnum, "COV_4">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c83ea966fdead..65d9862621061 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17588,7 +17588,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion; - if (Cov == clang::TargetOptions::COV_None) { + if (Cov == CodeObjectVersionKind::COV_None) { StringRef Name = "__oclc_ABI_version"; auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name); if (!ABIVersionC) @@ -17606,7 +17606,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { Value *IsCOV5 = CGF.Builder.CreateICmpSGE( ABIVersion, - llvm::ConstantInt::get(CGF.Int32Ty, clang::TargetOptions::COV_5)); + llvm::ConstantInt::get(CGF.Int32Ty, CodeObjectVersionKind::COV_5)); // Indexing the implicit kernarg segment. Value *ImplicitGEP = CGF.Builder.CreateConstGEP1_32( @@ -17621,7 +17621,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) { Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2))); } else { Value *GEP = nullptr; - if (Cov == clang::TargetOptions::COV_5) { + if (Cov == CodeObjectVersionKind::COV_5) { // Indexing the implicit kernarg segment. GEP = CGF.Builder.CreateConstGEP1_32( CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 41ff4a992f194..3225c98476865 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -847,7 +847,7 @@ void CodeGenModule::Release() { // Emit amdgpu_code_object_version module flag, which is code object version // times 100. if (getTarget().getTargetOpts().CodeObjectVersion != - TargetOptions::COV_None) { + llvm::CodeObjectVersionKind::COV_None) { getModule().addModuleFlag(llvm::Module::Error, "amdgpu_code_object_version", getTarget().getTargetOpts().CodeObjectVersion); diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp index 4dd25213dda9f..b654e3f12af8d 100644 --- a/clang/lib/CodeGen/Targets/AMDGPU.cpp +++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp @@ -368,7 +368,7 @@ void AMDGPUTargetCodeGenInfo::emitTargetGlobals( return; if (CGM.getTarget().getTargetOpts().CodeObjectVersion == - clang::TargetOptions::COV_None) + llvm::CodeObjectVersionKind::COV_None) return; auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32); diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 86e1c57e48568..a6fa94defa5e2 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -277,6 +277,14 @@ static void processVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args, } } +void Flang::AddAMDGPUTargetArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) { + StringRef Val = A->getValue(); + CmdArgs.push_back(Args.MakeArgString("-mcode-object-version=" + Val)); + } +} + void Flang::addTargetOptions(const ArgList &Args, ArgStringList &CmdArgs) const { const ToolChain &TC = getToolChain(); @@ -300,6 +308,9 @@ void Flang::addTargetOptions(const ArgList &Args, case llvm::Triple::r600: case llvm::Triple::amdgcn: + getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); + AddAMDGPUTargetArgs(Args, CmdArgs); + break; case llvm::Triple::riscv64: case llvm::Triple::x86_64: getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h index 0141240b5d3ac..8d35080e1c0c8 100644 --- a/clang/lib/Driver/ToolChains/Flang.h +++ b/clang/lib/Driver/ToolChains/Flang.h @@ -63,6 +63,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool { void AddAArch64TargetArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; + /// Add specific options for AMDGPU target. + /// + /// \param [in] Args The list of input driver arguments + /// \param [out] CmdArgs The list of output command arguments + void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + /// Extract offload options from the driver arguments and add them to /// the command arguments. /// \param [in] C The current compilation for the driver invocation diff --git a/flang/include/flang/Frontend/CodeGenOptions.h b/flang/include/flang/Frontend/CodeGenOptions.h index b86bb88610a9a..0c318e4023af4 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.h +++ b/flang/include/flang/Frontend/CodeGenOptions.h @@ -85,6 +85,10 @@ class CodeGenOptions : public CodeGenOptionsBase { RK_WithPattern, // Remark pattern specified via '-Rgroup=regexp'. }; + /// \brief Code object version for AMDGPU. + llvm::CodeObjectVersionKind CodeObjectVersion = + llvm::CodeObjectVersionKind::COV_4; + /// Optimization remark with an optional regular expression pattern. struct OptRemark { RemarkKind Kind = RemarkKind::RK_Missing; diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 1c09ae9c281eb..0dc11abcbd7ba 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -268,6 +268,17 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, opts.PrepareForThinLTO = true; } + if (const llvm::opt::Arg *a = args.getLastArg( + clang::driver::options::OPT_mcode_object_version_EQ)) { + llvm::StringRef s = a->getValue(); + if (s == "5") + opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_5; + if (s == "4") + opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_4; + if (s == "none") + opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_None; + } + // -f[no-]save-optimization-record[=] if (const llvm::opt::Arg *a = args.getLastArg(clang::driver::options::OPT_opt_record_file)) diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index f573ac82c91cd..6663548cc81b6 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -244,8 +244,7 @@ static void setMLIRDataLayout(mlir::ModuleOp &mlirModule, mlirModule->setAttr(mlir::DLTIDialect::kDataLayoutAttrName, dlSpec); } -static void addDepdendentLibs(mlir::ModuleOp &mlirModule, - CompilerInstance &ci) { +static void addDependentLibs(mlir::ModuleOp &mlirModule, CompilerInstance &ci) { const std::vector &libs = ci.getInvocation().getCodeGenOpts().DependentLibs; if (libs.empty()) { @@ -264,6 +263,68 @@ static void addDepdendentLibs(mlir::ModuleOp &mlirModule, } } +// Add to MLIR code target specific items which are dependent on target +// configuration specified by the user. +// Clang equivalent function: AMDGPUTargetCodeGenInfo::emitTargetGlobals +static void addAMDGPUSpecificMLIRItems(mlir::ModuleOp &mlirModule, + CompilerInstance &ci) { + const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts(); + const llvm::Triple triple(targetOpts.triple); + const llvm::StringRef codeObjectVersionGlobalOpName = "__oclc_ABI_version"; + + // TODO: Share address spaces enumeration between Clang and Flang. + // Currently this enumeration is defined in Clang specific class + // defined in file: clang/lib/Basic/Targets/AMDGPU.h . + // and we need to move it to LLVM directory. + const int constantAddressSpace = 4; + + if (!triple.isAMDGPU()) { + return; + } + const CodeGenOptions &codeGenOpts = ci.getInvocation().getCodeGenOpts(); + if (codeGenOpts.CodeObjectVersion == llvm::CodeObjectVersionKind::COV_None) { + return; + } + + mlir::ConversionPatternRewriter builder(mlirModule.getContext()); + unsigned oclcABIVERsion = codeGenOpts.CodeObjectVersion; + auto int32Type = builder.getI32Type(); + + std::optional originalGV; + + mlirModule.walk([&originalGV, codeObjectVersionGlobalOpName]( + mlir::LLVM::GlobalOp globalOp) { + if (globalOp.getName() == codeObjectVersionGlobalOpName) + originalGV = globalOp; + }); + if (originalGV.has_value()) { + mlir::LLVM::GlobalOp originalGVOp = originalGV.value(); + if (originalGVOp.getLinkage() != mlir::LLVM::Linkage::External) { + return; + } + // Update the variable if it is already present in MLIR but it was marked + // as external linkage variable + originalGVOp.setLinkage(mlir::LLVM::Linkage::WeakODR); + originalGVOp.setValueAttr( + builder.getIntegerAttr(int32Type, oclcABIVERsion)); + originalGVOp.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local); + originalGVOp.setAddrSpace(constantAddressSpace); + originalGVOp.setVisibility_(mlir::LLVM::Visibility::Hidden); + return; + } + + mlir::LLVM::GlobalOp covInfo = builder.create( + /* Location */ mlirModule.getLoc(), /* Type */ int32Type, + /* IsConstant */ true, /* Linkage */ mlir::LLVM::Linkage::WeakODR, + /* Name */ codeObjectVersionGlobalOpName, + /* Value */ builder.getIntegerAttr(int32Type, oclcABIVERsion)); + covInfo.setUnnamedAddr(mlir::LLVM::UnnamedAddr::Local); + covInfo.setAddrSpace(constantAddressSpace); + covInfo.setVisibility_(mlir::LLVM::Visibility::Hidden); + builder.setInsertionPointToStart(mlirModule.getBody()); + builder.insert(covInfo); +} + bool CodeGenAction::beginSourceFileAction() { llvmCtx = std::make_unique(); CompilerInstance &ci = this->getInstance(); @@ -365,8 +426,10 @@ bool CodeGenAction::beginSourceFileAction() { Fortran::parser::Program &parseTree{*ci.getParsing().parseTree()}; lb.lower(parseTree, ci.getInvocation().getSemanticsContext()); - // Add dependent libraries - addDepdendentLibs(*mlirModule, ci); + // Add target specific items like dependent libraries, target specific + // constants etc. + addDependentLibs(*mlirModule, ci); + addAMDGPUSpecificMLIRItems(*mlirModule, ci); // run the default passes. mlir::PassManager pm((*mlirModule)->getName(), diff --git a/flang/test/Driver/code-object-version.f90 b/flang/test/Driver/code-object-version.f90 new file mode 100644 index 0000000000000..cd88ff7fe29a2 --- /dev/null +++ b/flang/test/Driver/code-object-version.f90 @@ -0,0 +1,8 @@ +! RUN: not %flang -target amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=3 -S %s -o \ +! RUN: /dev/null 2>&1 | FileCheck --check-prefix=INVALID_VERSION %s + +! RUN: %flang -target x86_64-unknown-linux-gnu -mcode-object-version=3 -S %s -o \ +! RUN: /dev/null 2>&1 | FileCheck --check-prefix=UNUSED_PARAM %s + +! INVALID_VERSION: error: invalid integral value '3' in '-mcode-object-version=3' +! UNUSED_PARAM: warning: argument unused during compilation: '-mcode-object-version=3' [-Wunused-command-line-argument] diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index b276f1906e1a4..4ea89776416a6 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -114,6 +114,8 @@ ! CHECK-NEXT: -I Add directory to the end of the list of include search paths ! CHECK-NEXT: -L Add directory to library search path ! CHECK-NEXT: -march= For a list of available architectures for the target use '-mcpu=help' +! CHECK-NEXT: -mcode-object-version= +! CHECK-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only) ! CHECK-NEXT: -mcpu= For a list of available CPUs for the target use '-mcpu=help' ! CHECK-NEXT: -mllvm= Alias for -mllvm ! CHECK-NEXT: -mllvm Additional arguments to forward to LLVM's option processing diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 452c62541e72e..6fb306d3196fb 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -100,6 +100,8 @@ ! HELP-NEXT: -I Add directory to the end of the list of include search paths ! HELP-NEXT: -L Add directory to library search path ! HELP-NEXT: -march= For a list of available architectures for the target use '-mcpu=help' +! HELP-NEXT: -mcode-object-version= +! HELP-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only) ! HELP-NEXT: -mcpu= For a list of available CPUs for the target use '-mcpu=help' ! HELP-NEXT: -mllvm= Alias for -mllvm ! HELP-NEXT: -mllvm Additional arguments to forward to LLVM's option processing @@ -232,6 +234,8 @@ ! HELP-FC1-NEXT: -init-only Only execute frontend initialization ! HELP-FC1-NEXT: -I Add directory to the end of the list of include search paths ! HELP-FC1-NEXT: -load Load the named plugin (dynamic shared object) +! HELP-FC1-NEXT: -mcode-object-version= +! HELP-FC1-NEXT: Specify code object ABI version. Defaults to 4. (AMDGPU only) ! HELP-FC1-NEXT: -menable-no-infs Allow optimization to assume there are no infinities. ! HELP-FC1-NEXT: -menable-no-nans Allow optimization to assume there are no NaNs. ! HELP-FC1-NEXT: -mllvm Additional arguments to forward to LLVM's option processing diff --git a/flang/test/Lower/AMD/code-object-version.f90 b/flang/test/Lower/AMD/code-object-version.f90 new file mode 100644 index 0000000000000..7cb9dc079724e --- /dev/null +++ b/flang/test/Lower/AMD/code-object-version.f90 @@ -0,0 +1,13 @@ +!REQUIRES: amdgpu-registered-target +!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 %s -o - | FileCheck --check-prefix=COV_DEFAULT %s +!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=none %s -o - | FileCheck --check-prefix=COV_NONE %s +!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s +!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s + +!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 +!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 +!COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 +!COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32 +subroutine target_simple +end subroutine target_simple + diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index d6d767f3d22c7..4df897c047a38 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -121,6 +121,16 @@ namespace llvm { Never, }; + /// \brief Enumeration value for AMDGPU code object version, which is the + /// code object version times 100. + enum CodeObjectVersionKind { + COV_None, + COV_2 = 200, // Unsupported. + COV_3 = 300, // Unsupported. + COV_4 = 400, + COV_5 = 500, + }; + class TargetOptions { public: TargetOptions()