diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index c70f48af33cf2..e86abb7203f2b 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -38,6 +38,13 @@ static cl::opt EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden, cl::desc("Enable reciprocal sqrt optimization")); +// FIXME: This is a WAR to recover lost performance from #155024. +// We still need to investigate the regression and find a more permanent +// solution. +static cl::opt EnableMADWide("nvptx-mad-wide-opt", cl::init(false), + cl::Hidden, + cl::desc("Enable MAD wide optimization")); + /// createNVPTXISelDag - This pass converts a legalized DAG into a /// NVPTX-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM, @@ -84,6 +91,8 @@ bool NVPTXDAGToDAGISel::allowFMA() const { bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; } +bool NVPTXDAGToDAGISel::doMADWideOpt() const { return EnableMADWide; } + /// Select - Select instructions not customized! Used for /// expanded, promoted and normal instructions. void NVPTXDAGToDAGISel::Select(SDNode *N) { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 8dcd5362c4512..c912e709d0aa0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -45,6 +45,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel { bool useF32FTZ() const; bool allowFMA() const; bool doRsqrtOpt() const; + bool doMADWideOpt() const; NVPTXScopes Scopes{}; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 4e38e026e6bda..4e873558b2537 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -114,6 +114,7 @@ def hasArchAccelFeatures : Predicate<"Subtarget->hasArchAccelFeatures()">; def doF32FTZ : Predicate<"useF32FTZ()">; def doNoF32FTZ : Predicate<"!useF32FTZ()">; def doRsqrtOpt : Predicate<"doRsqrtOpt()">; +def doMADWideOpt : Predicate<"doMADWideOpt()">; def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">; def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">; @@ -899,8 +900,15 @@ let Predicates = [hasOptEnabled] in { defm MAD_LO_S32 : MADInst<"lo.s32", mul, I32RT, I32RT>; defm MAD_LO_S64 : MADInst<"lo.s64", mul, I64RT, I64RT>; - // Generating mad.wide causes a regression: + // Generating mad.wide causes a regression in some cases: // https://github.com/llvm/llvm-project/pull/150477#issuecomment-3191367837 + // Only do so when the user requests it. + let Predicates = [doMADWideOpt] in { + defm MAD_WIDE_U16 : MADInst<"wide.u16", umul_wide, I32RT, I16RT>; + defm MAD_WIDE_S16 : MADInst<"wide.s16", smul_wide, I32RT, I16RT>; + defm MAD_WIDE_U32 : MADInst<"wide.u32", umul_wide, I64RT, I32RT>; + defm MAD_WIDE_S32 : MADInst<"wide.s32", smul_wide, I64RT, I32RT>; + } } //----------------------------------- diff --git a/llvm/test/CodeGen/NVPTX/combine-wide.ll b/llvm/test/CodeGen/NVPTX/combine-wide.ll index b5948d37c3505..63e0f3789f49f 100644 --- a/llvm/test/CodeGen/NVPTX/combine-wide.ll +++ b/llvm/test/CodeGen/NVPTX/combine-wide.ll @@ -1,24 +1,37 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -O1 | FileCheck %s --check-prefixes=CHECK,O1 +; RUN: llc < %s -O1 | FileCheck %s --check-prefixes=CHECK,O1,O1-NO-MAD +; RUN: llc < %s -O1 -nvptx-mad-wide-opt | FileCheck %s --check-prefixes=CHECK,O1,O1-MAD ; RUN: llc < %s -O0 | FileCheck %s --check-prefixes=CHECK,O0 target triple = "nvptx64-nvidia-cuda" define i64 @t1(i32 %a, i32 %b, i64 %c) { -; -; O1-LABEL: t1( -; O1: { -; O1-NEXT: .reg .b32 %r<3>; -; O1-NEXT: .reg .b64 %rd<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b32 %r1, [t1_param_0]; -; O1-NEXT: ld.param.b32 %r2, [t1_param_1]; -; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2; -; O1-NEXT: ld.param.b64 %rd2, [t1_param_2]; -; O1-NEXT: add.s64 %rd3, %rd2, %rd1; -; O1-NEXT: st.param.b64 [func_retval0], %rd3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t1( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b32 %r<3>; +; O1-NO-MAD-NEXT: .reg .b64 %rd<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t1_param_0]; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t1_param_1]; +; O1-NO-MAD-NEXT: mul.wide.s32 %rd1, %r1, %r2; +; O1-NO-MAD-NEXT: ld.param.b64 %rd2, [t1_param_2]; +; O1-NO-MAD-NEXT: add.s64 %rd3, %rd2, %rd1; +; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t1( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-NEXT: .reg .b64 %rd<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b32 %r1, [t1_param_0]; +; O1-MAD-NEXT: ld.param.b32 %r2, [t1_param_1]; +; O1-MAD-NEXT: ld.param.b64 %rd1, [t1_param_2]; +; O1-MAD-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1; +; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t1( ; O0: { @@ -41,20 +54,32 @@ define i64 @t1(i32 %a, i32 %b, i64 %c) { } define i64 @t2(i32 %a, i32 %b, i64 %c) { -; -; O1-LABEL: t2( -; O1: { -; O1-NEXT: .reg .b32 %r<3>; -; O1-NEXT: .reg .b64 %rd<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b32 %r1, [t2_param_0]; -; O1-NEXT: ld.param.b32 %r2, [t2_param_1]; -; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2; -; O1-NEXT: ld.param.b64 %rd2, [t2_param_2]; -; O1-NEXT: add.s64 %rd3, %rd1, %rd2; -; O1-NEXT: st.param.b64 [func_retval0], %rd3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t2( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b32 %r<3>; +; O1-NO-MAD-NEXT: .reg .b64 %rd<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t2_param_0]; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t2_param_1]; +; O1-NO-MAD-NEXT: mul.wide.s32 %rd1, %r1, %r2; +; O1-NO-MAD-NEXT: ld.param.b64 %rd2, [t2_param_2]; +; O1-NO-MAD-NEXT: add.s64 %rd3, %rd1, %rd2; +; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t2( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-NEXT: .reg .b64 %rd<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b32 %r1, [t2_param_0]; +; O1-MAD-NEXT: ld.param.b32 %r2, [t2_param_1]; +; O1-MAD-NEXT: ld.param.b64 %rd1, [t2_param_2]; +; O1-MAD-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1; +; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t2( ; O0: { @@ -77,19 +102,30 @@ define i64 @t2(i32 %a, i32 %b, i64 %c) { } define i64 @t3(i32 %a, i32 %b) { -; -; O1-LABEL: t3( -; O1: { -; O1-NEXT: .reg .b32 %r<3>; -; O1-NEXT: .reg .b64 %rd<3>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b32 %r1, [t3_param_0]; -; O1-NEXT: ld.param.b32 %r2, [t3_param_1]; -; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2; -; O1-NEXT: add.s64 %rd2, %rd1, 1; -; O1-NEXT: st.param.b64 [func_retval0], %rd2; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t3( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b32 %r<3>; +; O1-NO-MAD-NEXT: .reg .b64 %rd<3>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t3_param_0]; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t3_param_1]; +; O1-NO-MAD-NEXT: mul.wide.s32 %rd1, %r1, %r2; +; O1-NO-MAD-NEXT: add.s64 %rd2, %rd1, 1; +; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t3( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-NEXT: .reg .b64 %rd<2>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b32 %r1, [t3_param_0]; +; O1-MAD-NEXT: ld.param.b32 %r2, [t3_param_1]; +; O1-MAD-NEXT: mad.wide.s32 %rd1, %r1, %r2, 1; +; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t3( ; O0: { @@ -111,19 +147,30 @@ define i64 @t3(i32 %a, i32 %b) { } define i64 @t4(i32 %a, i64 %c) { -; -; O1-LABEL: t4( -; O1: { -; O1-NEXT: .reg .b32 %r<2>; -; O1-NEXT: .reg .b64 %rd<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b32 %r1, [t4_param_0]; -; O1-NEXT: ld.param.b64 %rd1, [t4_param_1]; -; O1-NEXT: mul.wide.s32 %rd2, %r1, 3; -; O1-NEXT: add.s64 %rd3, %rd1, %rd2; -; O1-NEXT: st.param.b64 [func_retval0], %rd3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t4( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b32 %r<2>; +; O1-NO-MAD-NEXT: .reg .b64 %rd<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t4_param_0]; +; O1-NO-MAD-NEXT: ld.param.b64 %rd1, [t4_param_1]; +; O1-NO-MAD-NEXT: mul.wide.s32 %rd2, %r1, 3; +; O1-NO-MAD-NEXT: add.s64 %rd3, %rd1, %rd2; +; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t4( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b32 %r<2>; +; O1-MAD-NEXT: .reg .b64 %rd<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b32 %r1, [t4_param_0]; +; O1-MAD-NEXT: ld.param.b64 %rd1, [t4_param_1]; +; O1-MAD-NEXT: mad.wide.s32 %rd2, %r1, 3, %rd1; +; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t4( ; O0: { @@ -145,18 +192,28 @@ define i64 @t4(i32 %a, i64 %c) { } define i64 @t4_1(i32 %a, i64 %c) { -; -; O1-LABEL: t4_1( -; O1: { -; O1-NEXT: .reg .b32 %r<2>; -; O1-NEXT: .reg .b64 %rd<3>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b32 %r1, [t4_1_param_0]; -; O1-NEXT: mul.wide.s32 %rd1, %r1, 3; -; O1-NEXT: add.s64 %rd2, %rd1, 5; -; O1-NEXT: st.param.b64 [func_retval0], %rd2; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t4_1( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b32 %r<2>; +; O1-NO-MAD-NEXT: .reg .b64 %rd<3>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t4_1_param_0]; +; O1-NO-MAD-NEXT: mul.wide.s32 %rd1, %r1, 3; +; O1-NO-MAD-NEXT: add.s64 %rd2, %rd1, 5; +; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t4_1( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b32 %r<2>; +; O1-MAD-NEXT: .reg .b64 %rd<2>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b32 %r1, [t4_1_param_0]; +; O1-MAD-NEXT: mad.wide.s32 %rd1, %r1, 3, 5; +; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t4_1( ; O0: { @@ -177,20 +234,32 @@ define i64 @t4_1(i32 %a, i64 %c) { } define i64 @t5(i32 %a, i32 %b, i64 %c) { -; -; O1-LABEL: t5( -; O1: { -; O1-NEXT: .reg .b32 %r<3>; -; O1-NEXT: .reg .b64 %rd<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b32 %r1, [t5_param_0]; -; O1-NEXT: ld.param.b32 %r2, [t5_param_1]; -; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2; -; O1-NEXT: ld.param.b64 %rd2, [t5_param_2]; -; O1-NEXT: add.s64 %rd3, %rd2, %rd1; -; O1-NEXT: st.param.b64 [func_retval0], %rd3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t5( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b32 %r<3>; +; O1-NO-MAD-NEXT: .reg .b64 %rd<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t5_param_0]; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t5_param_1]; +; O1-NO-MAD-NEXT: mul.wide.u32 %rd1, %r1, %r2; +; O1-NO-MAD-NEXT: ld.param.b64 %rd2, [t5_param_2]; +; O1-NO-MAD-NEXT: add.s64 %rd3, %rd2, %rd1; +; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t5( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-NEXT: .reg .b64 %rd<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b32 %r1, [t5_param_0]; +; O1-MAD-NEXT: ld.param.b32 %r2, [t5_param_1]; +; O1-MAD-NEXT: ld.param.b64 %rd1, [t5_param_2]; +; O1-MAD-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1; +; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t5( ; O0: { @@ -213,20 +282,32 @@ define i64 @t5(i32 %a, i32 %b, i64 %c) { } define i64 @t6(i32 %a, i32 %b, i64 %c) { -; -; O1-LABEL: t6( -; O1: { -; O1-NEXT: .reg .b32 %r<3>; -; O1-NEXT: .reg .b64 %rd<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b32 %r1, [t6_param_0]; -; O1-NEXT: ld.param.b32 %r2, [t6_param_1]; -; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2; -; O1-NEXT: ld.param.b64 %rd2, [t6_param_2]; -; O1-NEXT: add.s64 %rd3, %rd1, %rd2; -; O1-NEXT: st.param.b64 [func_retval0], %rd3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t6( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b32 %r<3>; +; O1-NO-MAD-NEXT: .reg .b64 %rd<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t6_param_0]; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t6_param_1]; +; O1-NO-MAD-NEXT: mul.wide.u32 %rd1, %r1, %r2; +; O1-NO-MAD-NEXT: ld.param.b64 %rd2, [t6_param_2]; +; O1-NO-MAD-NEXT: add.s64 %rd3, %rd1, %rd2; +; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t6( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-NEXT: .reg .b64 %rd<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b32 %r1, [t6_param_0]; +; O1-MAD-NEXT: ld.param.b32 %r2, [t6_param_1]; +; O1-MAD-NEXT: ld.param.b64 %rd1, [t6_param_2]; +; O1-MAD-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1; +; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t6( ; O0: { @@ -249,7 +330,6 @@ define i64 @t6(i32 %a, i32 %b, i64 %c) { } define i32 @t7(i16 %a, i16 %b) { -; ; O1-LABEL: t7( ; O1: { ; O1-NEXT: .reg .b16 %rs<4>; @@ -281,7 +361,6 @@ define i32 @t7(i16 %a, i16 %b) { } define i32 @t8(i16 %a, i16 %b) { -; ; O1-LABEL: t8( ; O1: { ; O1-NEXT: .reg .b16 %rs<4>; @@ -313,7 +392,6 @@ define i32 @t8(i16 %a, i16 %b) { } define i64 @t9(i32 %a, i32 %b) { -; ; O1-LABEL: t9( ; O1: { ; O1-NEXT: .reg .b32 %r<4>; @@ -345,7 +423,6 @@ define i64 @t9(i32 %a, i32 %b) { } define i64 @t10(i32 %a, i32 %b) { -; ; O1-LABEL: t10( ; O1: { ; O1-NEXT: .reg .b32 %r<4>; @@ -377,7 +454,6 @@ define i64 @t10(i32 %a, i32 %b) { } define i32 @t11(i16 %a, i16 %b) { -; ; O1-LABEL: t11( ; O1: { ; O1-NEXT: .reg .b16 %rs<4>; @@ -409,7 +485,6 @@ define i32 @t11(i16 %a, i16 %b) { } define i32 @t12(i16 %a, i16 %b) { -; ; O1-LABEL: t12( ; O1: { ; O1-NEXT: .reg .b16 %rs<3>; @@ -440,7 +515,6 @@ define i32 @t12(i16 %a, i16 %b) { } define i64 @t13(i32 %a, i32 %b) { -; ; O1-LABEL: t13( ; O1: { ; O1-NEXT: .reg .b32 %r<4>; @@ -472,7 +546,6 @@ define i64 @t13(i32 %a, i32 %b) { } define i64 @t14(i32 %a, i32 %b) { -; ; O1-LABEL: t14( ; O1: { ; O1-NEXT: .reg .b32 %r<3>; @@ -503,7 +576,6 @@ define i64 @t14(i32 %a, i32 %b) { } define i32 @t15(i16 %a, i16 %b) { -; ; O1-LABEL: t15( ; O1: { ; O1-NEXT: .reg .b16 %rs<3>; @@ -534,7 +606,6 @@ define i32 @t15(i16 %a, i16 %b) { } define i32 @t16(i16 %a, i16 %b) { -; ; O1-LABEL: t16( ; O1: { ; O1-NEXT: .reg .b16 %rs<4>; @@ -566,7 +637,6 @@ define i32 @t16(i16 %a, i16 %b) { } define i64 @t17(i32 %a, i32 %b) { -; ; O1-LABEL: t17( ; O1: { ; O1-NEXT: .reg .b32 %r<3>; @@ -597,7 +667,6 @@ define i64 @t17(i32 %a, i32 %b) { } define i64 @t18(i32 %a, i32 %b) { -; ; O1-LABEL: t18( ; O1: { ; O1-NEXT: .reg .b32 %r<4>; @@ -629,7 +698,6 @@ define i64 @t18(i32 %a, i32 %b) { } define i32 @t19(i16 %a, i16 %b) { -; ; O1-LABEL: t19( ; O1: { ; O1-NEXT: .reg .b16 %rs<4>; @@ -661,7 +729,6 @@ define i32 @t19(i16 %a, i16 %b) { } define i32 @t20(i16 %a) { -; ; CHECK-LABEL: t20( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; @@ -679,7 +746,6 @@ define i32 @t20(i16 %a) { } define i64 @t21(i32 %a) { -; ; CHECK-LABEL: t21( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -697,7 +763,6 @@ define i64 @t21(i32 %a) { } define i64 @t22(i32 %a) { -; ; CHECK-LABEL: t22( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -715,7 +780,6 @@ define i64 @t22(i32 %a) { } define i32 @t23(i16 %a, i16 %b) { -; ; CHECK-LABEL: t23( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; @@ -733,7 +797,6 @@ define i32 @t23(i16 %a, i16 %b) { } define i32 @t24(i16 %a, i16 %b) { -; ; O1-LABEL: t24( ; O1: { ; O1-NEXT: .reg .b16 %rs<2>; @@ -762,7 +825,6 @@ define i32 @t24(i16 %a, i16 %b) { } define i64 @t25(i32 %a) { -; ; CHECK-LABEL: t25( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -780,7 +842,6 @@ define i64 @t25(i32 %a) { } define i64 @t26(i32 %a) { -; ; O1-LABEL: t26( ; O1: { ; O1-NEXT: .reg .b32 %r<2>; @@ -809,7 +870,6 @@ define i64 @t26(i32 %a) { } define i32 @t27(i16 %a, i16 %b) { -; ; O1-LABEL: t27( ; O1: { ; O1-NEXT: .reg .b16 %rs<2>; @@ -838,7 +898,6 @@ define i32 @t27(i16 %a, i16 %b) { } define i32 @t28(i16 %a, i16 %b) { -; ; CHECK-LABEL: t28( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; @@ -856,7 +915,6 @@ define i32 @t28(i16 %a, i16 %b) { } define i64 @t29(i32 %a) { -; ; O1-LABEL: t29( ; O1: { ; O1-NEXT: .reg .b32 %r<2>; @@ -885,7 +943,6 @@ define i64 @t29(i32 %a) { } define i64 @t30(i32 %a) { -; ; CHECK-LABEL: t30( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<3>; @@ -903,7 +960,6 @@ define i64 @t30(i32 %a) { } define i64 @t31(i32 %a, i32 %b) { -; ; O1-LABEL: t31( ; O1: { ; O1-NEXT: .reg .b32 %r<4>; @@ -935,20 +991,32 @@ define i64 @t31(i32 %a, i32 %b) { } define i32 @t32(i16 %a, i16 %b, i32 %c) { -; -; O1-LABEL: t32( -; O1: { -; O1-NEXT: .reg .b16 %rs<3>; -; O1-NEXT: .reg .b32 %r<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b16 %rs1, [t32_param_0]; -; O1-NEXT: ld.param.b16 %rs2, [t32_param_1]; -; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2; -; O1-NEXT: ld.param.b32 %r2, [t32_param_2]; -; O1-NEXT: add.s32 %r3, %r2, %r1; -; O1-NEXT: st.param.b32 [func_retval0], %r3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t32( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b16 %rs<3>; +; O1-NO-MAD-NEXT: .reg .b32 %r<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t32_param_0]; +; O1-NO-MAD-NEXT: ld.param.b16 %rs2, [t32_param_1]; +; O1-NO-MAD-NEXT: mul.wide.s16 %r1, %rs1, %rs2; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t32_param_2]; +; O1-NO-MAD-NEXT: add.s32 %r3, %r2, %r1; +; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t32( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b16 %rs<3>; +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b16 %rs1, [t32_param_0]; +; O1-MAD-NEXT: ld.param.b16 %rs2, [t32_param_1]; +; O1-MAD-NEXT: ld.param.b32 %r1, [t32_param_2]; +; O1-MAD-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1; +; O1-MAD-NEXT: st.param.b32 [func_retval0], %r2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t32( ; O0: { @@ -971,20 +1039,32 @@ define i32 @t32(i16 %a, i16 %b, i32 %c) { } define i32 @t33(i16 %a, i16 %b, i32 %c) { -; -; O1-LABEL: t33( -; O1: { -; O1-NEXT: .reg .b16 %rs<3>; -; O1-NEXT: .reg .b32 %r<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b16 %rs1, [t33_param_0]; -; O1-NEXT: ld.param.b16 %rs2, [t33_param_1]; -; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2; -; O1-NEXT: ld.param.b32 %r2, [t33_param_2]; -; O1-NEXT: add.s32 %r3, %r2, %r1; -; O1-NEXT: st.param.b32 [func_retval0], %r3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t33( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b16 %rs<3>; +; O1-NO-MAD-NEXT: .reg .b32 %r<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t33_param_0]; +; O1-NO-MAD-NEXT: ld.param.b16 %rs2, [t33_param_1]; +; O1-NO-MAD-NEXT: mul.wide.s16 %r1, %rs1, %rs2; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t33_param_2]; +; O1-NO-MAD-NEXT: add.s32 %r3, %r2, %r1; +; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t33( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b16 %rs<3>; +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b16 %rs1, [t33_param_0]; +; O1-MAD-NEXT: ld.param.b16 %rs2, [t33_param_1]; +; O1-MAD-NEXT: ld.param.b32 %r1, [t33_param_2]; +; O1-MAD-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1; +; O1-MAD-NEXT: st.param.b32 [func_retval0], %r2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t33( ; O0: { @@ -1007,19 +1087,30 @@ define i32 @t33(i16 %a, i16 %b, i32 %c) { } define i32 @t34(i16 %a, i16 %b) { -; -; O1-LABEL: t34( -; O1: { -; O1-NEXT: .reg .b16 %rs<3>; -; O1-NEXT: .reg .b32 %r<3>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b16 %rs1, [t34_param_0]; -; O1-NEXT: ld.param.b16 %rs2, [t34_param_1]; -; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2; -; O1-NEXT: add.s32 %r2, %r1, 1; -; O1-NEXT: st.param.b32 [func_retval0], %r2; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t34( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b16 %rs<3>; +; O1-NO-MAD-NEXT: .reg .b32 %r<3>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t34_param_0]; +; O1-NO-MAD-NEXT: ld.param.b16 %rs2, [t34_param_1]; +; O1-NO-MAD-NEXT: mul.wide.s16 %r1, %rs1, %rs2; +; O1-NO-MAD-NEXT: add.s32 %r2, %r1, 1; +; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t34( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b16 %rs<3>; +; O1-MAD-NEXT: .reg .b32 %r<2>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b16 %rs1, [t34_param_0]; +; O1-MAD-NEXT: ld.param.b16 %rs2, [t34_param_1]; +; O1-MAD-NEXT: mad.wide.s16 %r1, %rs1, %rs2, 1; +; O1-MAD-NEXT: st.param.b32 [func_retval0], %r1; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t34( ; O0: { @@ -1041,19 +1132,30 @@ define i32 @t34(i16 %a, i16 %b) { } define i32 @t35(i16 %a, i32 %c) { -; -; O1-LABEL: t35( -; O1: { -; O1-NEXT: .reg .b16 %rs<2>; -; O1-NEXT: .reg .b32 %r<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b16 %rs1, [t35_param_0]; -; O1-NEXT: ld.param.b32 %r1, [t35_param_1]; -; O1-NEXT: mul.wide.s16 %r2, %rs1, 3; -; O1-NEXT: add.s32 %r3, %r1, %r2; -; O1-NEXT: st.param.b32 [func_retval0], %r3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t35( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b16 %rs<2>; +; O1-NO-MAD-NEXT: .reg .b32 %r<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t35_param_0]; +; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t35_param_1]; +; O1-NO-MAD-NEXT: mul.wide.s16 %r2, %rs1, 3; +; O1-NO-MAD-NEXT: add.s32 %r3, %r1, %r2; +; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t35( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b16 %rs<2>; +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b16 %rs1, [t35_param_0]; +; O1-MAD-NEXT: ld.param.b32 %r1, [t35_param_1]; +; O1-MAD-NEXT: mad.wide.s16 %r2, %rs1, 3, %r1; +; O1-MAD-NEXT: st.param.b32 [func_retval0], %r2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t35( ; O0: { @@ -1075,18 +1177,28 @@ define i32 @t35(i16 %a, i32 %c) { } define i32 @t36(i16 %a, i32 %c) { -; -; O1-LABEL: t36( -; O1: { -; O1-NEXT: .reg .b16 %rs<2>; -; O1-NEXT: .reg .b32 %r<3>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b16 %rs1, [t36_param_0]; -; O1-NEXT: mul.wide.s16 %r1, %rs1, 3; -; O1-NEXT: add.s32 %r2, %r1, 5; -; O1-NEXT: st.param.b32 [func_retval0], %r2; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t36( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b16 %rs<2>; +; O1-NO-MAD-NEXT: .reg .b32 %r<3>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t36_param_0]; +; O1-NO-MAD-NEXT: mul.wide.s16 %r1, %rs1, 3; +; O1-NO-MAD-NEXT: add.s32 %r2, %r1, 5; +; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t36( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b16 %rs<2>; +; O1-MAD-NEXT: .reg .b32 %r<2>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b16 %rs1, [t36_param_0]; +; O1-MAD-NEXT: mad.wide.s16 %r1, %rs1, 3, 5; +; O1-MAD-NEXT: st.param.b32 [func_retval0], %r1; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t36( ; O0: { @@ -1107,20 +1219,32 @@ define i32 @t36(i16 %a, i32 %c) { } define i32 @t37(i16 %a, i16 %b, i32 %c) { -; -; O1-LABEL: t37( -; O1: { -; O1-NEXT: .reg .b16 %rs<3>; -; O1-NEXT: .reg .b32 %r<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b16 %rs1, [t37_param_0]; -; O1-NEXT: ld.param.b16 %rs2, [t37_param_1]; -; O1-NEXT: mul.wide.u16 %r1, %rs1, %rs2; -; O1-NEXT: ld.param.b32 %r2, [t37_param_2]; -; O1-NEXT: add.s32 %r3, %r2, %r1; -; O1-NEXT: st.param.b32 [func_retval0], %r3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t37( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b16 %rs<3>; +; O1-NO-MAD-NEXT: .reg .b32 %r<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t37_param_0]; +; O1-NO-MAD-NEXT: ld.param.b16 %rs2, [t37_param_1]; +; O1-NO-MAD-NEXT: mul.wide.u16 %r1, %rs1, %rs2; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t37_param_2]; +; O1-NO-MAD-NEXT: add.s32 %r3, %r2, %r1; +; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t37( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b16 %rs<3>; +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b16 %rs1, [t37_param_0]; +; O1-MAD-NEXT: ld.param.b16 %rs2, [t37_param_1]; +; O1-MAD-NEXT: ld.param.b32 %r1, [t37_param_2]; +; O1-MAD-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1; +; O1-MAD-NEXT: st.param.b32 [func_retval0], %r2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t37( ; O0: { @@ -1143,20 +1267,32 @@ define i32 @t37(i16 %a, i16 %b, i32 %c) { } define i32 @t38(i16 %a, i16 %b, i32 %c) { -; -; O1-LABEL: t38( -; O1: { -; O1-NEXT: .reg .b16 %rs<3>; -; O1-NEXT: .reg .b32 %r<4>; -; O1-EMPTY: -; O1-NEXT: // %bb.0: -; O1-NEXT: ld.param.b16 %rs1, [t38_param_0]; -; O1-NEXT: ld.param.b16 %rs2, [t38_param_1]; -; O1-NEXT: mul.wide.u16 %r1, %rs1, %rs2; -; O1-NEXT: ld.param.b32 %r2, [t38_param_2]; -; O1-NEXT: add.s32 %r3, %r1, %r2; -; O1-NEXT: st.param.b32 [func_retval0], %r3; -; O1-NEXT: ret; +; O1-NO-MAD-LABEL: t38( +; O1-NO-MAD: { +; O1-NO-MAD-NEXT: .reg .b16 %rs<3>; +; O1-NO-MAD-NEXT: .reg .b32 %r<4>; +; O1-NO-MAD-EMPTY: +; O1-NO-MAD-NEXT: // %bb.0: +; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t38_param_0]; +; O1-NO-MAD-NEXT: ld.param.b16 %rs2, [t38_param_1]; +; O1-NO-MAD-NEXT: mul.wide.u16 %r1, %rs1, %rs2; +; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t38_param_2]; +; O1-NO-MAD-NEXT: add.s32 %r3, %r1, %r2; +; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r3; +; O1-NO-MAD-NEXT: ret; +; +; O1-MAD-LABEL: t38( +; O1-MAD: { +; O1-MAD-NEXT: .reg .b16 %rs<3>; +; O1-MAD-NEXT: .reg .b32 %r<3>; +; O1-MAD-EMPTY: +; O1-MAD-NEXT: // %bb.0: +; O1-MAD-NEXT: ld.param.b16 %rs1, [t38_param_0]; +; O1-MAD-NEXT: ld.param.b16 %rs2, [t38_param_1]; +; O1-MAD-NEXT: ld.param.b32 %r1, [t38_param_2]; +; O1-MAD-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1; +; O1-MAD-NEXT: st.param.b32 [func_retval0], %r2; +; O1-MAD-NEXT: ret; ; ; O0-LABEL: t38( ; O0: {