Skip to content

Commit

Permalink
[AMDGPU] Set fast-math flags on functions given the options
Browse files Browse the repository at this point in the history
We have a single library build without relaxation options.
When inlined library functions remove fast math attributes
from the functions they are integrated into.

This patch sets relaxation attributes on the functions after
linking provided corresponding relaxation options are given.
Math instructions inside the inlined functions remain to have
no fast flags, but inlining does not prevent fast math
transformations of a surrounding caller code anymore.

Differential Revision: https://reviews.llvm.org/D38325

llvm-svn: 314568
  • Loading branch information
rampitec committed Sep 29, 2017
1 parent b33607e commit 1d8cf2b
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 7 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class ModulePass;
class Pass;
class Target;
class TargetMachine;
class TargetOptions;
class PassRegistry;
class Module;

Expand Down Expand Up @@ -52,7 +53,7 @@ FunctionPass *createSIDebuggerInsertNopsPass();
FunctionPass *createSIInsertWaitsPass();
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createSIFixWWMLivenessPass();
FunctionPass *createAMDGPUSimplifyLibCallsPass();
FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetOptions &);
FunctionPass *createAMDGPUUseNativeCallsPass();
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
Expand Down
33 changes: 30 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <vector>
#include <cmath>

Expand Down Expand Up @@ -168,10 +169,13 @@ namespace {

AMDGPULibCalls Simplifier;

const TargetOptions Options;

public:
static char ID; // Pass identification

AMDGPUSimplifyLibCalls() : FunctionPass(ID) {
AMDGPUSimplifyLibCalls(const TargetOptions &Opt = TargetOptions())
: FunctionPass(ID), Options(Opt) {
initializeAMDGPUSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}

Expand Down Expand Up @@ -1680,14 +1684,34 @@ bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
}

// Public interface to the Simplify LibCalls pass.
FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass() {
return new AMDGPUSimplifyLibCalls();
FunctionPass *llvm::createAMDGPUSimplifyLibCallsPass(const TargetOptions &Opt) {
return new AMDGPUSimplifyLibCalls(Opt);
}

FunctionPass *llvm::createAMDGPUUseNativeCallsPass() {
return new AMDGPUUseNativeCalls();
}

static bool setFastFlags(Function &F, const TargetOptions &Options) {
AttrBuilder B;

if (Options.UnsafeFPMath || Options.NoInfsFPMath)
B.addAttribute("no-infs-fp-math", "true");
if (Options.UnsafeFPMath || Options.NoNaNsFPMath)
B.addAttribute("no-nans-fp-math", "true");
if (Options.UnsafeFPMath) {
B.addAttribute("less-precise-fpmad", "true");
B.addAttribute("unsafe-fp-math", "true");
}

if (!B.hasAttributes())
return false;

F.addAttributes(AttributeList::FunctionIndex, B);

return true;
}

bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
Expand All @@ -1699,6 +1723,9 @@ bool AMDGPUSimplifyLibCalls::runOnFunction(Function &F) {
F.printAsOperand(dbgs(), false, F.getParent());
dbgs() << '\n';);

if (!EnablePreLink)
Changed |= setFastFlags(F, Options);

for (auto &BB : F) {
for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
// Ignore non-calls.
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,17 +370,18 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
PM.add(createAMDGPUAlwaysInlinePass(false));
});

const auto &Opt = Options;
Builder.addExtension(
PassManagerBuilder::EP_EarlyAsPossible,
[AMDGPUAA, LibCallSimplify](const PassManagerBuilder &,
legacy::PassManagerBase &PM) {
[AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
legacy::PassManagerBase &PM) {
if (AMDGPUAA) {
PM.add(createAMDGPUAAWrapperPass());
PM.add(createAMDGPUExternalAAWrapperPass());
}
PM.add(llvm::createAMDGPUUseNativeCallsPass());
if (LibCallSimplify)
PM.add(llvm::createAMDGPUSimplifyLibCallsPass());
PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
});

Builder.addExtension(
Expand Down
33 changes: 33 additions & 0 deletions llvm/test/CodeGen/AMDGPU/inline-attr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-unsafe-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=UNSAFE %s
; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-nans-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NONANS %s
; RUN: opt -mtriple=amdgcn--amdhsa -S -O3 -enable-no-infs-fp-math %s | FileCheck -check-prefix=GCN -check-prefix=NOINFS %s

; GCN: define float @foo(float %x) local_unnamed_addr #0 {
; GCN: define amdgpu_kernel void @caller(float addrspace(1)* nocapture %p) local_unnamed_addr #1 {
; GCN: %mul.i = fmul float %load, 1.500000e+01

; UNSAFE: attributes #0 = { norecurse nounwind readnone "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
; UNSAFE: attributes #1 = { norecurse nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }

; NOINFS: attributes #0 = { norecurse nounwind readnone "no-infs-fp-math"="true" }
; NOINFS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }

; NONANS: attributes #0 = { norecurse nounwind readnone "no-nans-fp-math"="true" }
; NONANS: attributes #1 = { norecurse nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }

define float @foo(float %x) #0 {
entry:
%mul = fmul float %x, 1.500000e+01
ret float %mul
}

define amdgpu_kernel void @caller(float addrspace(1)* %p) #1 {
entry:
%load = load float, float addrspace(1)* %p, align 4
%call = call fast float @foo(float %load) #0
store float %call, float addrspace(1)* %p, align 4
ret void
}

attributes #0 = { nounwind }
attributes #1 = { nounwind "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }

0 comments on commit 1d8cf2b

Please sign in to comment.