diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index e84070f1a5468..e84a3286eaa9a 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -380,6 +380,7 @@ multiclass NopCast_pat { def Add: pf2; def And: pf2; def Sra: pf2; def Sub: pf2; def Or: pf2; def Srl: pf2; def Mul: pf2; def Xor: pf2; def Shl: pf2; +def Sext: pf1; def Zext: pf1; def Smin: pf2; def Smax: pf2; def Umin: pf2; def Umax: pf2; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index 674d19176a88b..64bb93a5ca8f8 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -76,6 +76,12 @@ def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; def VZxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackub $Vs)>; def VZxth: OutPatFrag<(ops node:$Vs), (V6_vunpackuh $Vs)>; +def VShuff: OutPatFrag<(ops node:$Vs, node:$S), + (V6_vshuffvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>; + +def VDeal: OutPatFrag<(ops node:$Vs, node:$S), + (V6_vdealvdd (HiVec $Vs), (LoVec $Vs), (A2_tfrsi $S))>; + class VSubi: OutPatFrag<(ops node:$Imm, node:$Vs), (VSub (VSplati (i32 $Imm)), $Vs)>; @@ -402,6 +408,64 @@ class Vneg1 class Vnot : PatFrag<(ops node:$Vs), (xor $Vs, Vneg1)>; +class ExtOp_pat + : Pat<(ResType (Op (Ext VPred:$Vs), (Ext VPred:$Vt))), + (VShuff (MI VPred:$Vs, VPred:$Vt), Shuff)>; + +class VOpAcc_pat + : Pat<(ResType (add VxPred:$Vx, (Op (Ext VsPred:$Vs), (Ext VsPred:$Vt)))), + (VShuff (MI (VDeal $Vx, Shuff), VsPred:$Vs, VsPred:$Vt), Shuff)>; + +let Predicates = [UseHVX] in { + let AddedComplexity = 200 in { + def : ExtOp_pat; + def : ExtOp_pat; + def : ExtOp_pat; + + def : ExtOp_pat; + def : ExtOp_pat; + def : ExtOp_pat; + + def : ExtOp_pat; + def : ExtOp_pat; + def : ExtOp_pat; + def : ExtOp_pat; + + // The first operand in V6_vmpybusv is unsigned. + def : Pat<(VecPI16 (mul (VecPI16 (zext HVI8:$Vs)), + (VecPI16 (sext HVI8:$Vv)))), + (VShuff (V6_vmpybusv HVI8:$Vs, HVI8:$Vv), -2)>; + + // The second operand in V6_vmpyhus is unsigned. + def : Pat<(VecPI32 (mul (VecPI32 (sext HVI16:$Vs)), + (VecPI32 (zext HVI16:$Vv)))), + (VShuff (V6_vmpyhus HVI16:$Vs, HVI16:$Vv), -4)>; + + def : VOpAcc_pat; + def : VOpAcc_pat; + def : VOpAcc_pat; + + def : VOpAcc_pat; + def : VOpAcc_pat; + def : VOpAcc_pat; + def : VOpAcc_pat; + + // The second operand in V6_vmpybusv_acc is unsigned. + def : Pat<(VecPI16 (add HWI16:$Vx , (mul (VecPI16 (zext HVI8:$Vs)), + (VecPI16 (sext HVI8:$Vt))))), + (VShuff (V6_vmpybusv_acc (VDeal $Vx, -2), + HVI8:$Vs, HVI8:$Vt), -2)>; + + // The third operand in V6_vmpyhus_acc is unsigned. + def : Pat<(add HWI32:$Vx, (mul (VecPI32 (sext HVI16:$Vs)), + (VecPI32 (zext HVI16:$Vt)))), + (VShuff (V6_vmpyhus_acc (VDeal $Vx, -4), + HVI16:$Vs, HVI16:$Vt), -4)>; + } +} + let Predicates = [UseHVX] in { let AddedComplexity = 200 in { def: Pat<(Vnot HVI8:$Vs), (V6_vnot HvxVR:$Vs)>; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll index f45dce7791118..291243299c534 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/arith.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/arith.ll @@ -132,141 +132,5 @@ define <32 x i32> @xorw_128(<32 x i32> %v0, <32 x i32> %v1) #1 { ret <32 x i32> %p } -; --- add - -; CHECK-LABEL: addb_64: -; CHECK: vadd(v0.b,v1.b) -define <64 x i8> @addb_64(<64 x i8> %v0, <64 x i8> %v1) #0 { - %p = add <64 x i8> %v0, %v1 - ret <64 x i8> %p -} - -; CHECK-LABEL: addb_128: -; CHECK: vadd(v0.b,v1.b) -define <128 x i8> @addb_128(<128 x i8> %v0, <128 x i8> %v1) #1 { - %p = add <128 x i8> %v0, %v1 - ret <128 x i8> %p -} - -; CHECK-LABEL: addh_64: -; CHECK: vadd(v0.h,v1.h) -define <32 x i16> @addh_64(<32 x i16> %v0, <32 x i16> %v1) #0 { - %p = add <32 x i16> %v0, %v1 - ret <32 x i16> %p -} - -; CHECK-LABEL: addh_128: -; CHECK: vadd(v0.h,v1.h) -define <64 x i16> @addh_128(<64 x i16> %v0, <64 x i16> %v1) #1 { - %p = add <64 x i16> %v0, %v1 - ret <64 x i16> %p -} - -; CHECK-LABEL: addw_64: -; CHECK: vadd(v0.w,v1.w) -define <16 x i32> @addw_64(<16 x i32> %v0, <16 x i32> %v1) #0 { - %p = add <16 x i32> %v0, %v1 - ret <16 x i32> %p -} - -; CHECK-LABEL: addw_128: -; CHECK: vadd(v0.w,v1.w) -define <32 x i32> @addw_128(<32 x i32> %v0, <32 x i32> %v1) #1 { - %p = add <32 x i32> %v0, %v1 - ret <32 x i32> %p -} - -; --- sub - -; CHECK-LABEL: subb_64: -; CHECK: vsub(v0.b,v1.b) -define <64 x i8> @subb_64(<64 x i8> %v0, <64 x i8> %v1) #0 { - %p = sub <64 x i8> %v0, %v1 - ret <64 x i8> %p -} - -; CHECK-LABEL: subb_128: -; CHECK: vsub(v0.b,v1.b) -define <128 x i8> @subb_128(<128 x i8> %v0, <128 x i8> %v1) #1 { - %p = sub <128 x i8> %v0, %v1 - ret <128 x i8> %p -} - -; CHECK-LABEL: subh_64: -; CHECK: vsub(v0.h,v1.h) -define <32 x i16> @subh_64(<32 x i16> %v0, <32 x i16> %v1) #0 { - %p = sub <32 x i16> %v0, %v1 - ret <32 x i16> %p -} - -; CHECK-LABEL: subh_128: -; CHECK: vsub(v0.h,v1.h) -define <64 x i16> @subh_128(<64 x i16> %v0, <64 x i16> %v1) #1 { - %p = sub <64 x i16> %v0, %v1 - ret <64 x i16> %p -} - -; CHECK-LABEL: subw_64: -; CHECK: vsub(v0.w,v1.w) -define <16 x i32> @subw_64(<16 x i32> %v0, <16 x i32> %v1) #0 { - %p = sub <16 x i32> %v0, %v1 - ret <16 x i32> %p -} - -; CHECK-LABEL: subw_128: -; CHECK: vsub(v0.w,v1.w) -define <32 x i32> @subw_128(<32 x i32> %v0, <32 x i32> %v1) #1 { - %p = sub <32 x i32> %v0, %v1 - ret <32 x i32> %p -} - -; --- mul - -; CHECK-LABEL: mpyb_64: -; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b) -; CHECK: vshuffe(v[[H0]].b,v[[L0]].b) -define <64 x i8> @mpyb_64(<64 x i8> %v0, <64 x i8> %v1) #0 { - %p = mul <64 x i8> %v0, %v1 - ret <64 x i8> %p -} - -; CHECK-LABEL: mpyb_128: -; CHECK: v[[H0:[0-9]+]]:[[L0:[0-9]+]].h = vmpy(v0.b,v1.b) -; CHECK: vshuffe(v[[H0]].b,v[[L0]].b) -define <128 x i8> @mpyb_128(<128 x i8> %v0, <128 x i8> %v1) #1 { - %p = mul <128 x i8> %v0, %v1 - ret <128 x i8> %p -} - -; CHECK-LABEL: mpyh_64: -; CHECK: vmpyi(v0.h,v1.h) -define <32 x i16> @mpyh_64(<32 x i16> %v0, <32 x i16> %v1) #0 { - %p = mul <32 x i16> %v0, %v1 - ret <32 x i16> %p -} - -; CHECK-LABEL: mpyh_128: -; CHECK: vmpyi(v0.h,v1.h) -define <64 x i16> @mpyh_128(<64 x i16> %v0, <64 x i16> %v1) #1 { - %p = mul <64 x i16> %v0, %v1 - ret <64 x i16> %p -} - -; CHECK-LABEL: mpyw_64: -; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h) -; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh) -define <16 x i32> @mpyw_64(<16 x i32> %v0, <16 x i32> %v1) #0 { - %p = mul <16 x i32> %v0, %v1 - ret <16 x i32> %p -} - -; CHECK-LABEL: mpyw_128: -; CHECK: v[[V0:[0-9]+]].w = vmpyieo(v0.h,v1.h) -; CHECK: v[[V0]].w += vmpyie(v0.w,v1.uh) -define <32 x i32> @mpyw_128(<32 x i32> %v0, <32 x i32> %v1) #1 { - %p = mul <32 x i32> %v0, %v1 - ret <32 x i32> %p -} - -attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" } -attributes #1 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } +attributes #0 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" } +attributes #1 = { nounwind "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll index a9483037e14b1..dd70368979c87 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-128b.ll @@ -573,4 +573,137 @@ define <32 x i32> @test_2i(<32 x i32> %v0, <32 x i32> %v1, <32 x i32> %v2) #0 { ret <32 x i32> %t1 } -attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length128b" } +; --- Float32 + +; CHECK-LABEL: test_2j: +; CHECK: q[[Q2J0:[0-3]]] = vcmp.eq(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q2J0]],v0,v1) +define <32 x float> @test_2j(<32 x float> %v0, <32 x float> %v1) #1 { + %t0 = fcmp oeq <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2k: +; CHECK: q[[Q2K0:[0-3]]] = vcmp.eq(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q2K0]],v1,v0) +define <32 x float> @test_2k(<32 x float> %v0, <32 x float> %v1) #1 { + %t0 = fcmp one <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2l: +; CHECK: v0.sf = vmin(v1.sf,v0.sf) +define <32 x float> @test_2l(<32 x float> %v0, <32 x float> %v1) #1 { + %t0 = fcmp olt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2m: +; CHECK: q[[Q2M0:[0-3]]] = vcmp.gt(v0.sf,v1.sf) +; CHECK: v0 = vmux(q[[Q2M0]],v1,v0) +define <32 x float> @test_2m(<32 x float> %v0, <32 x float> %v1) #1 { + %t0 = fcmp ole <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2n: +; CHECK: v0.sf = vmax(v0.sf,v1.sf) +define <32 x float> @test_2n(<32 x float> %v0, <32 x float> %v1) #1 { + %t0 = fcmp ogt <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2o: +; CHECK: q[[Q2O0:[0-3]]] = vcmp.gt(v1.sf,v0.sf) +; CHECK: v0 = vmux(q[[Q2O0]],v1,v0) +define <32 x float> @test_2o(<32 x float> %v0, <32 x float> %v1) #1 { + %t0 = fcmp oge <32 x float> %v0, %v1 + %t1 = select <32 x i1> %t0, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2p: +; CHECK: r[[R2P0:[0-9]*]] = ##16843009 +; CHECK: q[[Q2P1:[0-3]]] = vand(v2,r[[R2P0]]) +; CHECK: q[[Q2P1:[0-3]]] &= vcmp.eq(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q2P1]],v0,v1) +define <32 x float> @test_2p(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 { + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = and <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2q: +; CHECK: r[[R2Q0:[0-9]*]] = ##16843009 +; CHECK: q[[Q2Q1:[0-3]]] = vand(v2,r[[R2Q0]]) +; CHECK: q[[Q2Q1:[0-3]]] |= vcmp.eq(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q2Q1]],v0,v1) +define <32 x float> @test_2q(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 { + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = or <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2r: +; CHECK: r[[R2R0:[0-9]*]] = ##16843009 +; CHECK: q[[Q2R1:[0-3]]] = vand(v2,r[[R2R0]]) +; CHECK: q[[Q2R1:[0-3]]] ^= vcmp.eq(v0.w,v1.w) +; CHECK: v0 = vmux(q[[Q2R1]],v0,v1) +define <32 x float> @test_2r(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 { + %q0 = fcmp oeq <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = xor <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2s: +; CHECK: r[[R2S0:[0-9]*]] = ##16843009 +; CHECK: q[[Q2S1:[0-3]]] = vand(v2,r[[R2S0]]) +; CHECK: q[[Q2S1:[0-3]]] &= vcmp.gt(v0.sf,v1.sf) +; CHECK: v0 = vmux(q[[Q2R1]],v0,v1) +define <32 x float> @test_2s(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 { + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = and <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2t: +; CHECK: r[[R2T0:[0-9]*]] = ##16843009 +; CHECK: q[[Q2T1:[0-3]]] = vand(v2,r[[R2T0]]) +; CHECK: q[[Q2T1:[0-3]]] |= vcmp.gt(v0.sf,v1.sf) +; CHECK: v0 = vmux(q[[Q2T1]],v0,v1) +define <32 x float> @test_2t(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 { + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = or <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +; CHECK-LABEL: test_2u: +; CHECK: r[[R2U0:[0-9]*]] = ##16843009 +; CHECK: q[[Q2U1:[0-3]]] = vand(v2,r[[R2U0]]) +; CHECK: q[[Q2U1:[0-3]]] ^= vcmp.gt(v0.sf,v1.sf) +; CHECK: v0 = vmux(q[[Q2U1]],v0,v1) +define <32 x float> @test_2u(<32 x float> %v0, <32 x float> %v1, <32 x i32> %v2) #1 { + %q0 = fcmp ogt <32 x float> %v0, %v1 + %q1 = trunc <32 x i32> %v2 to <32 x i1> + %q2 = xor <32 x i1> %q0, %q1 + %t1 = select <32 x i1> %q2, <32 x float> %v0, <32 x float> %v1 + ret <32 x float> %t1 +} + +attributes #0 = { nounwind readnone "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length128b" } +attributes #1 = { nounwind readnone "target-cpu"="hexagonv68" "target-features"="+hvxv68,+hvx-length128b,+hvx-qfloat" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll index 7673f8b12264f..52176d6d2158c 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-compare-64b.ll @@ -574,4 +574,4 @@ define <16 x i32> @test_2i(<16 x i32> %v0, <16 x i32> %v1, <16 x i32> %v2) #0 { ret <16 x i32> %t1 } -attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-length64b" } +attributes #0 = { nounwind readnone "target-cpu"="hexagonv73" "target-features"="+hvxv73,+hvx-length64b" }