Skip to content

Commit

Permalink
[CUDA][HIP] Add -Xarch_device and -Xarch_host options
Browse files Browse the repository at this point in the history
The argument after -Xarch_device will be added to the arguments for CUDA/HIP
device compilation and will be removed for host compilation.

The argument after -Xarch_host will be added to the arguments for CUDA/HIP
host compilation and will be removed for device compilation.

Differential Revision: https://reviews.llvm.org/D76520
  • Loading branch information
yxsamliu committed Mar 24, 2020
1 parent d381b6a commit 2ae2564
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 24 deletions.
4 changes: 4 additions & 0 deletions clang/include/clang/Driver/Options.td
Expand Up @@ -466,6 +466,10 @@ def Xanalyzer : Separate<["-"], "Xanalyzer">,
HelpText<"Pass <arg> to the static analyzer">, MetaVarName<"<arg>">,
Group<StaticAnalyzer_Group>;
def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[DriverOption]>;
def Xarch_host : Separate<["-"], "Xarch_host">, Flags<[DriverOption]>,
HelpText<"Pass <arg> to the CUDA/HIP host compilation">, MetaVarName<"<arg>">;
def Xarch_device : Separate<["-"], "Xarch_device">, Flags<[DriverOption]>,
HelpText<"Pass <arg> to the CUDA/HIP device compilation">, MetaVarName<"<arg>">;
def Xassembler : Separate<["-"], "Xassembler">,
HelpText<"Pass <arg> to the assembler">, MetaVarName<"<arg>">,
Group<CompileOnly_Group>;
Expand Down
18 changes: 14 additions & 4 deletions clang/include/clang/Driver/ToolChain.h
Expand Up @@ -296,10 +296,20 @@ class ToolChain {
SmallVectorImpl<llvm::opt::Arg *> &AllocatedArgs) const;

/// Append the argument following \p A to \p DAL assuming \p A is an Xarch
/// argument.
virtual void TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
llvm::opt::Arg *&A,
llvm::opt::DerivedArgList *DAL) const;
/// argument. If \p AllocatedArgs is null pointer, synthesized arguments are
/// added to \p DAL, otherwise they are appended to \p AllocatedArgs.
virtual void TranslateXarchArgs(
const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A,
llvm::opt::DerivedArgList *DAL,
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs = nullptr) const;

/// Translate -Xarch_ arguments. If there are no such arguments, return
/// a null pointer, otherwise return a DerivedArgList containing the
/// translated arguments.
virtual llvm::opt::DerivedArgList *
TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind DeviceOffloadKind,
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const;

/// Choose a tool to use to handle the action \p JA.
///
Expand Down
19 changes: 16 additions & 3 deletions clang/lib/Driver/Compilation.cpp
Expand Up @@ -76,16 +76,29 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
*TranslatedArgs, SameTripleAsHost, AllocatedArgs);
}

DerivedArgList *NewDAL = nullptr;
if (!OpenMPArgs) {
NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BoundArch,
DeviceOffloadKind, &AllocatedArgs);
} else {
NewDAL = TC->TranslateXarchArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind,
&AllocatedArgs);
if (!NewDAL)
NewDAL = OpenMPArgs;
else
delete OpenMPArgs;
}

if (!NewDAL) {
Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind);
if (!Entry)
Entry = TranslatedArgs;
} else {
Entry = TC->TranslateArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind);
Entry = TC->TranslateArgs(*NewDAL, BoundArch, DeviceOffloadKind);
if (!Entry)
Entry = OpenMPArgs;
Entry = NewDAL;
else
delete OpenMPArgs;
delete NewDAL;
}

// Add allocated arguments to the final DAL.
Expand Down
63 changes: 58 additions & 5 deletions clang/lib/Driver/ToolChain.cpp
Expand Up @@ -1103,11 +1103,20 @@ llvm::opt::DerivedArgList *ToolChain::TranslateOpenMPTargetArgs(
return nullptr;
}

void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
llvm::opt::Arg *&A,
llvm::opt::DerivedArgList *DAL) const {
// TODO: Currently argument values separated by space e.g.
// -Xclang -mframe-pointer=no cannot be passed by -Xarch_. This should be
// fixed.
void ToolChain::TranslateXarchArgs(
const llvm::opt::DerivedArgList &Args, llvm::opt::Arg *&A,
llvm::opt::DerivedArgList *DAL,
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const {
const OptTable &Opts = getDriver().getOpts();
unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
unsigned ValuePos = 1;
if (A->getOption().matches(options::OPT_Xarch_device) ||
A->getOption().matches(options::OPT_Xarch_host))
ValuePos = 0;

unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(ValuePos));
unsigned Prev = Index;
std::unique_ptr<llvm::opt::Arg> XarchArg(Opts.ParseOneArg(Args, Index));

Expand All @@ -1130,5 +1139,49 @@ void ToolChain::TranslateXarchArgs(const llvm::opt::DerivedArgList &Args,
}
XarchArg->setBaseArg(A);
A = XarchArg.release();
DAL->AddSynthesizedArg(A);
if (!AllocatedArgs)
DAL->AddSynthesizedArg(A);
else
AllocatedArgs->push_back(A);
}

llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs(
const llvm::opt::DerivedArgList &Args, StringRef BoundArch,
Action::OffloadKind OFK,
SmallVectorImpl<llvm::opt::Arg *> *AllocatedArgs) const {
DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
bool Modified = false;

bool IsGPU = OFK == Action::OFK_Cuda || OFK == Action::OFK_HIP;
for (Arg *A : Args) {
bool NeedTrans = false;
bool Skip = false;
if (A->getOption().matches(options::OPT_Xarch_device)) {
NeedTrans = IsGPU;
Skip = !IsGPU;
} else if (A->getOption().matches(options::OPT_Xarch_host)) {
NeedTrans = !IsGPU;
Skip = IsGPU;
} else if (A->getOption().matches(options::OPT_Xarch__) && IsGPU) {
// Do not translate -Xarch_ options for non CUDA/HIP toolchain since
// they may need special translation.
// Skip this argument unless the architecture matches BoundArch
if (BoundArch.empty() || A->getValue(0) != BoundArch)
Skip = true;
else
NeedTrans = true;
}
if (NeedTrans || Skip)
Modified = true;
if (NeedTrans)
TranslateXarchArgs(Args, A, DAL, AllocatedArgs);
if (!Skip)
DAL->append(A);
}

if (Modified)
return DAL;

delete DAL;
return nullptr;
}
6 changes: 0 additions & 6 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Expand Up @@ -800,12 +800,6 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
}

for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_Xarch__)) {
// Skip this argument unless the architecture matches BoundArch
if (BoundArch.empty() || A->getValue(0) != BoundArch)
continue;
TranslateXarchArgs(Args, A, DAL);
}
DAL->append(A);
}

Expand Down
6 changes: 0 additions & 6 deletions clang/lib/Driver/ToolChains/HIP.cpp
Expand Up @@ -378,12 +378,6 @@ HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
const OptTable &Opts = getDriver().getOpts();

for (Arg *A : Args) {
if (A->getOption().matches(options::OPT_Xarch__)) {
// Skip this argument unless the architecture matches BoundArch.
if (BoundArch.empty() || A->getValue(0) != BoundArch)
continue;
TranslateXarchArgs(Args, A, DAL);
}
DAL->append(A);
}

Expand Down
13 changes: 13 additions & 0 deletions clang/test/Driver/hip-options.hip
Expand Up @@ -13,3 +13,16 @@
// RUN: -mllvm -amdgpu-early-inline-all=true %s 2>&1 | \
// RUN: FileCheck -check-prefix=MLLVM %s
// MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"

// RUN: %clang -### -Xarch_device -g -nogpulib --cuda-gpu-arch=gfx900 \
// RUN: -Xarch_device -fcf-protection=branch \
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEV %s
// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
// DEV: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
// DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"

// RUN: %clang -### -Xarch_host -g -nogpulib --cuda-gpu-arch=gfx900 \
// RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=HOST %s
// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
// HOST-NOT: clang{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
// HOST: clang{{.*}} "-debug-info-kind={{.*}}"

0 comments on commit 2ae2564

Please sign in to comment.