58 changes: 58 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,16 @@ AMDGPUMangledLibFunc::AMDGPUMangledLibFunc(
Leads[1] = copyFrom.Leads[1];
}

AMDGPUMangledLibFunc::AMDGPUMangledLibFunc(EFuncId id, FunctionType *FT,
bool SignedInts) {
FuncId = id;
unsigned NumArgs = FT->getNumParams();
if (NumArgs >= 1)
Leads[0] = Param::getFromTy(FT->getParamType(0), SignedInts);
if (NumArgs >= 2)
Leads[1] = Param::getFromTy(FT->getParamType(1), SignedInts);
}

///////////////////////////////////////////////////////////////////////////////
// Demangling

Expand Down Expand Up @@ -875,6 +885,50 @@ std::string AMDGPUMangledLibFunc::mangleNameItanium() const {
///////////////////////////////////////////////////////////////////////////////
// Misc

AMDGPULibFuncBase::Param AMDGPULibFuncBase::Param::getFromTy(Type *Ty,
bool Signed) {
Param P;
if (FixedVectorType *VT = dyn_cast<FixedVectorType>(Ty)) {
P.VectorSize = VT->getNumElements();
Ty = VT->getElementType();
}

switch (Ty->getTypeID()) {
case Type::FloatTyID:
P.ArgType = AMDGPULibFunc::F32;
break;
case Type::DoubleTyID:
P.ArgType = AMDGPULibFunc::F64;
break;
case Type::HalfTyID:
P.ArgType = AMDGPULibFunc::F16;
break;
case Type::IntegerTyID:
switch (cast<IntegerType>(Ty)->getBitWidth()) {
case 8:
P.ArgType = Signed ? AMDGPULibFunc::I8 : AMDGPULibFunc::U8;
break;
case 16:
P.ArgType = Signed ? AMDGPULibFunc::I16 : AMDGPULibFunc::U16;
break;
case 32:
P.ArgType = Signed ? AMDGPULibFunc::I32 : AMDGPULibFunc::U32;
break;
case 64:
P.ArgType = Signed ? AMDGPULibFunc::I64 : AMDGPULibFunc::U64;
break;
default:
llvm_unreachable("unhandled libcall argument type");
}

break;
default:
llvm_unreachable("unhandled libcall argument type");
}

return P;
}

static Type* getIntrinsicParamType(
LLVMContext& C,
const AMDGPULibFunc::Param& P,
Expand Down Expand Up @@ -1051,6 +1105,10 @@ AMDGPULibFunc::AMDGPULibFunc(EFuncId Id, const AMDGPULibFunc &CopyFrom) {
Id, *cast<AMDGPUMangledLibFunc>(CopyFrom.Impl.get())));
}

AMDGPULibFunc::AMDGPULibFunc(EFuncId Id, FunctionType *FT, bool SignedInts) {
Impl.reset(new AMDGPUMangledLibFunc(Id, FT, SignedInts));
}

AMDGPULibFunc::AMDGPULibFunc(StringRef Name, FunctionType *FT) {
Impl.reset(new AMDGPUUnmangledLibFunc(Name, FT));
}
Expand Down
22 changes: 16 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class FunctionCallee;
class FunctionType;
class Function;
class Module;
class Type;

class AMDGPULibFuncBase {
public:
Expand Down Expand Up @@ -290,18 +291,23 @@ class AMDGPULibFuncBase {
};

struct Param {
unsigned char ArgType;
unsigned char VectorSize;
unsigned char PtrKind;
unsigned char ArgType = 0;
unsigned char VectorSize = 1;
unsigned char PtrKind = 0;

unsigned char Reserved;
unsigned char Reserved = 0;

void reset() {
ArgType = 0;
VectorSize = 1;
PtrKind = 0;
}
Param() { reset(); }

static Param getIntN(unsigned char NumElts) {
return Param{I32, NumElts, 0, 0};
}

static Param getFromTy(Type *Ty, bool Signed);

template <typename Stream>
void mangleItanium(Stream& os);
Expand Down Expand Up @@ -351,7 +357,7 @@ class AMDGPULibFuncImpl : public AMDGPULibFuncBase {
protected:
EFuncId FuncId;
std::string Name;
ENamePrefix FKind;
ENamePrefix FKind = NOPFX;
};

/// Wrapper class for AMDGPULIbFuncImpl
Expand All @@ -362,6 +368,8 @@ class AMDGPULibFunc : public AMDGPULibFuncBase {
/// Clone a mangled library func with the Id \p Id and argument info from \p
/// CopyFrom.
explicit AMDGPULibFunc(EFuncId Id, const AMDGPULibFunc &CopyFrom);
explicit AMDGPULibFunc(EFuncId Id, FunctionType *FT, bool SignedInts);

/// Construct an unmangled library function on the fly.
explicit AMDGPULibFunc(StringRef FName, FunctionType *FT);

Expand Down Expand Up @@ -415,6 +423,8 @@ class AMDGPUMangledLibFunc : public AMDGPULibFuncImpl {
explicit AMDGPUMangledLibFunc();
explicit AMDGPUMangledLibFunc(EFuncId id,
const AMDGPUMangledLibFunc &copyFrom);
explicit AMDGPUMangledLibFunc(EFuncId id, FunctionType *FT,
bool SignedInts = true);

std::string getName() const override;
unsigned getNumArgs() const override;
Expand Down
952 changes: 252 additions & 700 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Large diffs are not rendered by default.

517 changes: 333 additions & 184 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll

Large diffs are not rendered by default.

226 changes: 163 additions & 63 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll

Large diffs are not rendered by default.

71 changes: 40 additions & 31 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ declare <16 x half> @_Z4powrDv16_DhS_(<16 x half>, <16 x half>)
define float @test_powr_fast_f32(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_fast_f32
; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @_Z4log2f(float [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call fast float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast float [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @_Z4exp2f(float [[__YLOGX]])
; CHECK-NEXT: [[__EXP2:%.*]] = call fast float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]
;
%powr = tail call fast float @_Z4powrff(float %x, float %y)
Expand All @@ -37,9 +37,9 @@ define float @test_powr_fast_f32(float %x, float %y) {
define <2 x float> @test_powr_fast_v2f32(<2 x float> %x, <2 x float> %y) {
; CHECK-LABEL: define <2 x float> @test_powr_fast_v2f32
; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) {
; CHECK-NEXT: [[__LOG2:%.*]] = call fast <2 x float> @_Z4log2Dv2_f(<2 x float> [[X]])
; CHECK-NEXT: [[__LOG2:%.*]] = call fast <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul fast <2 x float> [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call fast <2 x float> @_Z4exp2Dv2_f(<2 x float> [[__YLOGX]])
; CHECK-NEXT: [[__EXP2:%.*]] = call fast <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;
%powr = tail call fast <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y)
Expand Down Expand Up @@ -449,7 +449,7 @@ define float @test_powr_afn_f32_nnan_minsize(float %x, float %y) #0 {
define float @test_powr_afn_f32_noinline(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_afn_f32_noinline
; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR3:[0-9]+]]
; CHECK-NEXT: [[POWR:%.*]] = tail call afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: ret float [[POWR]]
;
%powr = tail call afn float @_Z4powrff(float %x, float %y) #1
Expand All @@ -459,7 +459,7 @@ define float @test_powr_afn_f32_noinline(float %x, float %y) {
define float @test_powr_afn_f32_nnan_noinline(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_afn_f32_nnan_noinline
; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR3]]
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan afn float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR4]]
; CHECK-NEXT: ret float [[POWR]]
;
%powr = tail call afn nnan float @_Z4powrff(float %x, float %y) #1
Expand All @@ -479,7 +479,7 @@ define float @test_powr_afn_f32_strictfp(float %x, float %y) #2 {
define float @test_powr_fast_f32_nobuiltin(float %x, float %y) {
; CHECK-LABEL: define float @test_powr_fast_f32_nobuiltin
; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call fast float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: [[POWR:%.*]] = tail call fast float @_Z4powrff(float [[X]], float [[Y]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: ret float [[POWR]]
;
%powr = tail call fast float @_Z4powrff(float %x, float %y) #3
Expand Down Expand Up @@ -545,8 +545,7 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_0.0(<2 x float> %x) {
define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef
; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float 0.000000e+00, float poison, float 0.000000e+00>)
; CHECK-NEXT: ret <3 x float> [[POWR]]
; CHECK-NEXT: ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
;
%powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float 0.0, float poison, float 0.0>)
ret <3 x float> %powr
Expand All @@ -555,8 +554,7 @@ define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x flo
define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef
; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float -0.000000e+00, float poison, float -0.000000e+00>)
; CHECK-NEXT: ret <3 x float> [[POWR]]
; CHECK-NEXT: ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
;
%powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float -0.0, float poison, float -0.0>)
ret <3 x float> %powr
Expand Down Expand Up @@ -615,8 +613,8 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_0.5(<2 x float> %x) {
define <3 x float> @test_powr_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x float> %y) {
; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.5_splat_undef
; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float 5.000000e-01, float poison, float 5.000000e-01>)
; CHECK-NEXT: ret <3 x float> [[POWR]]
; CHECK-NEXT: [[__POW2SQRT:%.*]] = call afn <3 x float> @_Z4sqrtDv3_f(<3 x float> [[X]])
; CHECK-NEXT: ret <3 x float> [[__POW2SQRT]]
;
%powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float 0.5, float poison, float 0.5>)
ret <3 x float> %powr
Expand All @@ -625,8 +623,8 @@ define <3 x float> @test_powr_afn_v3f32_0.5_splat_undef(<3 x float> %x, <3 x flo
define <3 x float> @test_powr_afn_v3f32_neg0.5_splat_undef(<3 x float> %x, <3 x float> %y) {
; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.5_splat_undef
; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float -5.000000e-01, float poison, float -5.000000e-01>)
; CHECK-NEXT: ret <3 x float> [[POWR]]
; CHECK-NEXT: [[__POW2RSQRT:%.*]] = call afn <3 x float> @_Z5rsqrtDv3_f(<3 x float> [[X]])
; CHECK-NEXT: ret <3 x float> [[__POW2RSQRT]]
;
%powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float -0.5, float poison, float -0.5>)
ret <3 x float> %powr
Expand Down Expand Up @@ -683,8 +681,7 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_1.0(<2 x float> %x) {
define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef
; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float 1.000000e+00, float poison, float 1.000000e+00>)
; CHECK-NEXT: ret <3 x float> [[POWR]]
; CHECK-NEXT: ret <3 x float> [[X]]
;
%powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float 1.0, float poison, float 1.0>)
ret <3 x float> %powr
Expand All @@ -693,8 +690,8 @@ define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x flo
define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef
; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> [[X]], <3 x float> <float -1.000000e+00, float poison, float -1.000000e+00>)
; CHECK-NEXT: ret <3 x float> [[POWR]]
; CHECK-NEXT: [[__POWRECIP:%.*]] = fdiv afn <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
; CHECK-NEXT: ret <3 x float> [[__POWRECIP]]
;
%powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float -1.0, float poison, float -1.0>)
ret <3 x float> %powr
Expand Down Expand Up @@ -1013,8 +1010,10 @@ define float @test_powr_afn_f32_nnan_x_known_positive(float nofpclass(ninf nnorm
define float @test_powr_afn_f32_nnan_ninf_x_known_positive(float nofpclass(ninf nnorm nsub) %x, float %y) {
; CHECK-LABEL: define float @test_powr_afn_f32_nnan_ninf_x_known_positive
; CHECK-SAME: (float nofpclass(ninf nsub nnorm) [[X:%.*]], float [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan ninf afn float @_Z4powrff(float [[X]], float [[Y]])
; CHECK-NEXT: ret float [[POWR]]
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]
;
%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y)
ret float %powr
Expand All @@ -1033,8 +1032,10 @@ define <2 x float> @test_powr_afn_v2f32_nnan_x_known_positive(<2 x float> nofpcl
define <2 x float> @test_powr_afn_v2f32_nnan_ninf_x_known_positive(<2 x float> nofpclass(ninf nnorm nsub) %x, <2 x float> %y) {
; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_nnan_ninf_x_known_positive
; CHECK-SAME: (<2 x float> nofpclass(ninf nsub nnorm) [[X:%.*]], <2 x float> [[Y:%.*]]) {
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan ninf afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y]])
; CHECK-NEXT: ret <2 x float> [[POWR]]
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;
%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y)
ret <2 x float> %powr
Expand Down Expand Up @@ -1108,8 +1109,10 @@ define float @test_powr_afn_nnan_ninf_f32_known_integral_sitofp(float %x, i32 %y
; CHECK-LABEL: define float @test_powr_afn_nnan_ninf_f32_known_integral_sitofp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp i32 [[Y]] to float
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan ninf afn float @_Z4powrff(float [[X]], float [[Y_CAST]])
; CHECK-NEXT: ret float [[POWR]]
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]
;
%y.cast = sitofp i32 %y to float
%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y.cast)
Expand Down Expand Up @@ -1144,8 +1147,10 @@ define float @test_powr_afn_nnan_ninf_f32_known_integral_uitofp(float %x, i32 %y
; CHECK-LABEL: define float @test_powr_afn_nnan_ninf_f32_known_integral_uitofp
; CHECK-SAME: (float [[X:%.*]], i32 [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp i32 [[Y]] to float
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan ninf afn float @_Z4powrff(float [[X]], float [[Y_CAST]])
; CHECK-NEXT: ret float [[POWR]]
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn float @llvm.log2.f32(float [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn float [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn float @llvm.exp2.f32(float [[__YLOGX]])
; CHECK-NEXT: ret float [[__EXP2]]
;
%y.cast = uitofp i32 %y to float
%powr = tail call afn nnan ninf float @_Z4powrff(float %x, float %y.cast)
Expand All @@ -1156,8 +1161,10 @@ define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x flo
; CHECK-LABEL: define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_sitofp
; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan ninf afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y_CAST]])
; CHECK-NEXT: ret <2 x float> [[POWR]]
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;
%y.cast = sitofp <2 x i32> %y to <2 x float>
%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
Expand Down Expand Up @@ -1192,8 +1199,10 @@ define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x flo
; CHECK-LABEL: define <2 x float> @test_powr_afn_nnan_ninf_v2f32_known_integral_uitofp
; CHECK-SAME: (<2 x float> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
; CHECK-NEXT: [[Y_CAST:%.*]] = uitofp <2 x i32> [[Y]] to <2 x float>
; CHECK-NEXT: [[POWR:%.*]] = tail call nnan ninf afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> [[Y_CAST]])
; CHECK-NEXT: ret <2 x float> [[POWR]]
; CHECK-NEXT: [[__LOG2:%.*]] = call nnan ninf afn <2 x float> @llvm.log2.v2f32(<2 x float> [[X]])
; CHECK-NEXT: [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[Y_CAST]]
; CHECK-NEXT: [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
; CHECK-NEXT: ret <2 x float> [[__EXP2]]
;
%y.cast = uitofp <2 x i32> %y to <2 x float>
%powr = tail call afn nnan ninf <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> %y.cast)
Expand Down
107 changes: 50 additions & 57 deletions llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -351,10 +351,10 @@ declare half @_Z4pownDhi(half, i32)

; GCN-LABEL: {{^}}define half @test_pown_f16(
; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
; GCN-NATIVE: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)
; GCN-NATIVE: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs)
; GCN-NATIVE: %pownI2F = sitofp i32 %y to half
; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F
; GCN-NATIVE: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)
; GCN-NATIVE: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
; GCN-NATIVE: %__ytou = trunc i32 %y to i16
; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15
; GCN-NATIVE: %0 = bitcast half %x to i16
Expand All @@ -371,16 +371,15 @@ entry:
declare float @_Z4pownfi(float, i32)

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow
; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 1.013000e+03)
; GCN-PRELINK: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, 1.013000e+03
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
; GCN-PRELINK: %__pow_sign = and i32 %[[r0]], -2147483648
; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
; GCN-PRELINK: store i32 %[[r2]], ptr addrspace(1) %a, align 4
; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs)
; GCN: %__ylogx = fmul fast float %__log2, 1.013000e+03
; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %[[r0]], -2147483648
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) {
entry:
%tmp = load float, ptr addrspace(1) %a, align 4
Expand All @@ -390,15 +389,10 @@ entry:
}

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr
; GCN-POSTLINK: call fast float @_Z4powrff(float %tmp, float %tmp1)
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %tmp)
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %tmp1
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
; GCN-PRELINK: store float %__exp2, ptr addrspace(1) %a, align 4
; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
; GCN-NATIVE: store float %__exp2, ptr addrspace(1) %a, align 4
; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %tmp)
; GCN: %__ylogx = fmul fast float %tmp1, %__log2
; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN: store float %__exp2, ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_powr(ptr addrspace(1) nocapture %a) {
entry:
%tmp = load float, ptr addrspace(1) %a, align 4
Expand All @@ -410,19 +404,18 @@ entry:
}

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown
; GCN-POSTLINK: call fast float @_Z4pownfi(float %tmp, i32 %conv)
; GCN-PRELINK: %conv = fptosi float %tmp1 to i32
; GCN-PRELINK: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
; GCN-PRELINK: %__log2 = tail call fast float @_Z4log2f(float %__fabs)
; GCN-PRELINK: %pownI2F = sitofp i32 %conv to float
; GCN-PRELINK: %__ylogx = fmul fast float %__log2, %pownI2F
; GCN-PRELINK: %__exp2 = tail call fast float @_Z4exp2f(float %__ylogx)
; GCN-PRELINK: %__yeven = shl i32 %conv, 31
; GCN-PRELINK: %[[r0:.*]] = bitcast float %tmp to i32
; GCN-PRELINK: %__pow_sign = and i32 %__yeven, %[[r0]]
; GCN-PRELINK: %[[r1:.*]] = bitcast float %__exp2 to i32
; GCN-PRELINK: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
; GCN-PRELINK: store i32 %[[r2]], ptr addrspace(1) %a, align 4
; GCN: %conv = fptosi float %tmp1 to i32
; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp)
; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs)
; GCN: %pownI2F = sitofp i32 %conv to float
; GCN: %__ylogx = fmul fast float %__log2, %pownI2F
; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN: %__yeven = shl i32 %conv, 31
; GCN: %[[r0:.*]] = bitcast float %tmp to i32
; GCN: %__pow_sign = and i32 %__yeven, %[[r0]]
; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32
; GCN: %[[r2:.*]] = or i32 %__pow_sign, %[[r1]]
; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) {
entry:
%tmp = load float, ptr addrspace(1) %a, align 4
Expand All @@ -438,30 +431,30 @@ declare half @_Z3powDhDh(half, half)
declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>)

; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x)
; GCN-PRELINK: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
; GCN-PRELINK: %__log2 = tail call fast half @_Z4log2Dh(half %__fabs)
; GCN-PRELINK: %__ylogx = fmul fast half %__log2, 0xH4A80
; GCN-PRELINK: %__exp2 = tail call fast half @_Z4exp2Dh(half %__ylogx)
; GCN-PRELINK: %1 = bitcast half %x to i16
; GCN-PRELINK: %__pow_sign = and i16 %1, -32768
; GCN-PRELINK: %2 = bitcast half %__exp2 to i16
; GCN-PRELINK: %3 = or i16 %__pow_sign, %2
; GCN-PRELINK: %4 = bitcast i16 %3 to half
; GCN: %__fabs = tail call fast half @llvm.fabs.f16(half %x)
; GCN: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs)
; GCN: %__ylogx = fmul fast half %__log2, 0xH4A80
; GCN: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx)
; GCN: %1 = bitcast half %x to i16
; GCN: %__pow_sign = and i16 %1, -32768
; GCN: %2 = bitcast half %__exp2 to i16
; GCN: %3 = or i16 %__pow_sign, %2
; GCN: %4 = bitcast i16 %3 to half
define half @test_pow_fast_f16__y_13(half %x) {
%powr = tail call fast half @_Z3powDhDh(half %x, half 13.0)
ret half %powr
}

; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x)
; GCN-PRELINK: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
; GCN-PRELINK: %__log2 = tail call fast <2 x half> @_Z4log2Dv2_Dh(<2 x half> %__fabs)
; GCN-PRELINK: %__ylogx = fmul fast <2 x half> %__log2, <half 0xH4A80, half 0xH4A80>
; GCN-PRELINK: %__exp2 = tail call fast <2 x half> @_Z4exp2Dv2_Dh(<2 x half> %__ylogx)
; GCN-PRELINK: %1 = bitcast <2 x half> %x to <2 x i16>
; GCN-PRELINK: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
; GCN-PRELINK: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
; GCN-PRELINK: %3 = or <2 x i16> %__pow_sign, %2
; GCN-PRELINK: %4 = bitcast <2 x i16> %3 to <2 x half>
; GCN: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
; GCN: %__log2 = tail call fast <2 x half> @llvm.log2.v2f16(<2 x half> %__fabs)
; GCN: %__ylogx = fmul fast <2 x half> %__log2, <half 0xH4A80, half 0xH4A80>
; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx)
; GCN: %1 = bitcast <2 x half> %x to <2 x i16>
; GCN: %__pow_sign = and <2 x i16> %1, <i16 -32768, i16 -32768>
; GCN: %2 = bitcast <2 x half> %__exp2 to <2 x i16>
; GCN: %3 = or <2 x i16> %__pow_sign, %2
; GCN: %4 = bitcast <2 x i16> %3 to <2 x half>
define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) {
%powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>)
ret <2 x half> %powr
Expand Down Expand Up @@ -673,11 +666,11 @@ entry:
declare float @_Z5log10f(float)

; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr
; GCN-NATIVE: %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
; GCN-NATIVE: %__log2 = tail call fast float @_Z11native_log2f(float %tmp)
; GCN-NATIVE: %__ylogx = fmul fast float %__log2, %tmp1
; GCN-NATIVE: %__exp2 = tail call fast float @_Z11native_exp2f(float %__ylogx)
; GCN-NATIVE: store float %__exp2, ptr addrspace(1) %a, align 4
; GCN: %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %tmp)
; GCN: %__ylogx = fmul fast float %tmp1, %__log2
; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx)
; GCN: store float %__exp2, ptr addrspace(1) %a, align 4
define amdgpu_kernel void @test_use_native_powr(ptr addrspace(1) nocapture %a) {
entry:
%tmp = load float, ptr addrspace(1) %a, align 4
Expand Down