diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index ef7f870663815..80287518fa56c 100755 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -91,6 +91,10 @@ HexagonTargetLowering::initializeHVXLowering() { if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { + setOperationAction(ISD::FMINNUM, MVT::v64f16, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v64f16, Legal); + setOperationAction(ISD::FMINNUM, MVT::v32f32, Legal); + setOperationAction(ISD::FMAXNUM, MVT::v32f32, Legal); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64f16, Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64f16, Custom); setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f32, Custom); @@ -122,6 +126,9 @@ HexagonTargetLowering::initializeHVXLowering() { setOperationAction(ISD::LOAD, MVT::v64f32, Custom); setOperationAction(ISD::STORE, MVT::v64f32, Custom); + setOperationAction(ISD::FMINNUM, MVT::v64f32, Custom); + setOperationAction(ISD::FMAXNUM, MVT::v64f32, Custom); + setOperationAction(ISD::VSELECT, MVT::v64f32, Custom); setOperationAction(ISD::MLOAD, MVT::v32f32, Custom); setOperationAction(ISD::MSTORE, MVT::v32f32, Custom); @@ -248,6 +255,32 @@ HexagonTargetLowering::initializeHVXLowering() { } } + setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand); + setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand); + + setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand); + // Boolean vectors. for (MVT T : LegalW) { @@ -2258,6 +2291,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CTLZ: case ISD::CTTZ: case ISD::MUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::MULHS: case ISD::MULHU: case ISD::AND: diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 8d94a9978831e..f72f02eb9cba6 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -453,6 +453,44 @@ let Predicates = [UseHVX] in { (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; } +let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { + def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect (qnot HQ16:$Qu), HVF16:$Vs, HVF16:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; + + def: Pat<(vselect HQ32:$Qu, HVF32:$Vs, HVF32:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>; + def: Pat<(vselect (qnot HQ32:$Qu), HVF32:$Vs, HVF32:$Vt), + (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>; +} + +let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in { + let AddedComplexity = 220 in { + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + } + def: OpR_RR_pat, VecF16, HVF16>; + def: OpR_RR_pat, VecF16, HVF16>; + def: OpR_RR_pat, VecF32, HVF32>; + def: OpR_RR_pat, VecF32, HVF32>; +} + +let Predicates = [UseHVXV68, UseHVX128B, UseHVXIEEEFP] in { + let AddedComplexity = 220 in { + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + defm: MinMax_pats; + } + def: OpR_RR_pat, VecF16, HVF16>; + def: OpR_RR_pat, VecF16, HVF16>; + def: OpR_RR_pat, VecF32, HVF32>; + def: OpR_RR_pat, VecF32, HVF32>; +} + let Predicates = [UseHVX] in { // For i8 vectors Vs = (a0, a1, ...), Vt = (b0, b1, ...), // V6_vmpybv Vs, Vt produces a pair of i16 vectors Hi:Lo, @@ -707,3 +745,63 @@ let Predicates = [UseHVX] in { def: AccRRR_pat; def: AccRRR_pat; } + +let Predicates = [UseHVXV68, UseHVXFloatingPoint] in { + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + def: AccRRR_pat; + + def: Pat<(VecQ16 (setone HVF16:$Vt, HVF16:$Vu)), + (V6_pred_not (V6_veqh HvxVR:$Vt, HvxVR:$Vu))>; + + def: Pat<(VecQ32 (setone HVF32:$Vt, HVF32:$Vu)), + (V6_pred_not (V6_veqw HvxVR:$Vt, HvxVR:$Vu))>; +} diff --git a/llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll b/llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll new file mode 100644 index 0000000000000..cb58004e63500 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/minmax-float.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-ieee-fp < %s | FileCheck %s + +; min + +define <64 x half> @test_00(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_00: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmin(v1.hf,v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_01(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_01: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_02(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_02: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmin(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0 + ret <64 x half> %t1 +} + +define <64 x half> @test_03(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_03: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0 + ret <64 x half> %t1 +} + +define <32 x float> @test_10(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_10: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmin(v1.sf,v0.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_11(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_11: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_12(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_12: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmin(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0 + ret <32 x float> %t1 +} + +define <32 x float> @test_13(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_13: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0 + ret <32 x float> %t1 +} + +; max + +define <64 x half> @test_20(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_20: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmax(v1.hf,v0.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0 + ret <64 x half> %t1 +} + +define <64 x half> @test_21(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_21: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v0 + ret <64 x half> %t1 +} + +define <64 x half> @test_22(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_22: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.hf = vmax(v0.hf,v1.hf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_23(<64 x half> %v0, <64 x half> %v1) #0 { +; CHECK-LABEL: test_23: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <32 x float> @test_30(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_30: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmax(v1.sf,v0.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0 + ret <32 x float> %t1 +} + +define <32 x float> @test_31(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_31: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v0 + ret <32 x float> %t1 +} + +define <32 x float> @test_32(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_32: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: v0.sf = vmax(v0.sf,v1.sf) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_33(<32 x float> %v0, <32 x float> %v1) #0 { +; CHECK-LABEL: test_33: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v0) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +attributes #0 = { readnone nounwind "target-cpu"="hexagonv69" } + diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll new file mode 100644 index 0000000000000..3937b2ed97f2f --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-float.ll @@ -0,0 +1,466 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-qfloat < %s | FileCheck %s +; RUN: llc -march=hexagon -mattr=+hvxv69,+hvx-length128b,+hvx-ieee-fp < %s | FileCheck %s + +; --- Half + +define <64 x half> @test_00(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_00: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oeq <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_01(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_01: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp one <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_02(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_02: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_03(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_03: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_04(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_04: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_05(<64 x half> %v0, <64 x half> %v1, <64 x half> %v2) #0 { +; CHECK-LABEL: test_05: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.hf,v0.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <64 x half> %v0, %v1 + %t1 = select <64 x i1> %t0, <64 x half> %v1, <64 x half> %v2 + ret <64 x half> %t1 +} + +define <64 x half> @test_0a(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0a: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 &= vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = and <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0b(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0b: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 |= vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = or <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0c(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0c: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 ^= vcmp.eq(v0.h,v1.h) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = xor <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0d(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0d: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 &= vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = and <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0e(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0e: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 |= vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = or <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + +define <64 x half> @test_0f(<64 x half> %v0, <64 x half> %v1, <64 x i16> %v2) #0 { +; CHECK-LABEL: test_0f: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 ^= vcmp.gt(v0.hf,v1.hf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <64 x half> %v0, %v1 + %q1 = trunc <64 x i16> %v2 to <64 x i1> + %q2 = xor <64 x i1> %q0, %q1 + %t1 = select <64 x i1> %q2, <64 x half> %v0, <64 x half> %v1 + ret <64 x half> %t1 +} + + +; --- Single + +define <32 x float> @test_10(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_10: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oeq <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_11(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_11: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp one <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_12(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_12: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp olt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_13(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_13: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ole <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_14(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_14: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v1,v2) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp ogt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_15(<32 x float> %v0, <32 x float> %v1, <32 x float> %v2) #0 { +; CHECK-LABEL: test_15: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vcmp.gt(v1.sf,v0.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v2,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %t0 = fcmp oge <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v1, <32 x float> %v2 + ret <32 x float> %t1 +} + +define <32 x float> @test_1a(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1a: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 &= vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = and <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1b(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1b: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 |= vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = or <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1c(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1c: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 ^= vcmp.eq(v0.w,v1.w) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = xor <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1d(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1d: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 &= vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = and <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1e(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1e: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 |= vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = or <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +define <32 x float> @test_1f(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #0 { +; CHECK-LABEL: test_1f: +; CHECK: // %bb.0: +; CHECK-NEXT: { +; CHECK-NEXT: r0 = ##16843009 +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 = vand(v2,r0) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: q0 ^= vcmp.gt(v0.sf,v1.sf) +; CHECK-NEXT: } +; CHECK-NEXT: { +; CHECK-NEXT: v0 = vmux(q0,v0,v1) +; CHECK-NEXT: jumpr r31 +; CHECK-NEXT: } + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = xor <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv69" }