diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 60aa61e993b26..ff1fcc550bf2b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1816,6 +1816,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, MVT::v4i32, MVT::v1i64, MVT::v2i64}) { setOperationAction(ISD::SDIV, VT, Custom); setOperationAction(ISD::UDIV, VT, Custom); + setOperationAction(ISD::MULHS, VT, Custom); + setOperationAction(ISD::MULHU, VT, Custom); } // NEON doesn't support 64-bit vector integer muls, but SVE does. @@ -1852,10 +1854,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTLZ, MVT::v1i64, Custom); setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); setOperationAction(ISD::CTTZ, MVT::v1i64, Custom); - setOperationAction(ISD::MULHS, MVT::v1i64, Custom); - setOperationAction(ISD::MULHS, MVT::v2i64, Custom); - setOperationAction(ISD::MULHU, MVT::v1i64, Custom); - setOperationAction(ISD::MULHU, MVT::v2i64, Custom); setOperationAction(ISD::SMAX, MVT::v1i64, Custom); setOperationAction(ISD::SMAX, MVT::v2i64, Custom); setOperationAction(ISD::SMIN, MVT::v1i64, Custom); diff --git a/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll index 32760caa524ec..3c817e5ddbd82 100644 --- a/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-mulh-pred.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; ; SMULH ; -define @smulh_i8( %a, %b) #0 { +define @smulh_i8( %a, %b) { ; CHECK-LABEL: smulh_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -19,7 +19,7 @@ define @smulh_i8( %a, %b ret %tr } -define @smulh_i16( %a, %b) #0 { +define @smulh_i16( %a, %b) { ; CHECK-LABEL: smulh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -33,7 +33,7 @@ define @smulh_i16( %a, % ret %tr } -define @smulh_i32( %a, %b) #0 { +define @smulh_i32( %a, %b) { ; CHECK-LABEL: smulh_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -47,7 +47,7 @@ define @smulh_i32( %a, % ret %tr } -define @smulh_i64( %a, %b) #0 { +define @smulh_i64( %a, %b) { ; CHECK-LABEL: smulh_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -65,7 +65,7 @@ define @smulh_i64( %a, % ; UMULH ; -define @umulh_i8( %a, %b) #0 { +define @umulh_i8( %a, %b) { ; CHECK-LABEL: umulh_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b @@ -79,7 +79,7 @@ define @umulh_i8( %a, %b ret %tr } -define @umulh_i16( %a, %b) #0 { +define @umulh_i16( %a, %b) { ; CHECK-LABEL: umulh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h @@ -93,7 +93,7 @@ define @umulh_i16( %a, % ret %tr } -define @umulh_i32( %a, %b) #0 { +define @umulh_i32( %a, %b) { ; CHECK-LABEL: umulh_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -107,7 +107,7 @@ define @umulh_i32( %a, % ret %tr } -define @umulh_i64( %a, %b) #0 { +define @umulh_i64( %a, %b) { ; CHECK-LABEL: umulh_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d @@ -121,4 +121,282 @@ define @umulh_i64( %a, % ret %tr } -attributes #0 = { "target-features"="+sve" } + +; Fixed-length 128bits + +define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: smulh_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <16 x i8> %a to <16 x i16> + %2 = sext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %1, %2 + %shr = lshr <16 x i16> %mul, splat(i16 8) + %tr = trunc <16 x i16> %shr to <16 x i8> + ret <16 x i8> %tr +} + +define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: smulh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <8 x i16> %a to <8 x i32> + %2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %1, %2 + %shr = lshr <8 x i32> %mul, splat(i32 16) + %tr = trunc <8 x i32> %shr to <8 x i16> + ret <8 x i16> %tr +} + +define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: smulh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <4 x i32> %a to <4 x i64> + %2 = sext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %2 + %shr = lshr <4 x i64> %mul, splat(i64 32) + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: smulh_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <2 x i64> %a to <2 x i128> + %2 = sext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %2 + %shr = lshr <2 x i128> %mul, splat(i128 64) + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + +define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: umulh_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <16 x i8> %a to <16 x i16> + %2 = zext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %1, %2 + %shr = lshr <16 x i16> %mul, splat(i16 8) + %tr = trunc <16 x i16> %shr to <16 x i8> + ret <16 x i8> %tr +} + +define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: umulh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <8 x i16> %a to <8 x i32> + %2 = zext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %1, %2 + %shr = lshr <8 x i32> %mul, splat(i32 16) + %tr = trunc <8 x i32> %shr to <8 x i16> + ret <8 x i16> %tr +} + +define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: umulh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <4 x i32> %a to <4 x i64> + %2 = zext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %2 + %shr = lshr <4 x i64> %mul, splat(i64 32) + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: umulh_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <2 x i64> %a to <2 x i128> + %2 = zext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %2 + %shr = lshr <2 x i128> %mul, splat(i128 64) + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + + + +; Fixed-length 64bits + +define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: smulh_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <8 x i8> %a to <8 x i16> + %2 = sext <8 x i8> %b to <8 x i16> + %mul = mul <8 x i16> %1, %2 + %shr = lshr <8 x i16> %mul, splat(i16 8) + %tr = trunc <8 x i16> %shr to <8 x i8> + ret <8 x i8> %tr +} + +define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: smulh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <4 x i16> %a to <4 x i32> + %2 = sext <4 x i16> %b to <4 x i32> + %mul = mul <4 x i32> %1, %2 + %shr = lshr <4 x i32> %mul, splat(i32 16) + %tr = trunc <4 x i32> %shr to <4 x i16> + ret <4 x i16> %tr +} + +define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: smulh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <2 x i32> %a to <2 x i64> + %2 = sext <2 x i32> %b to <2 x i64> + %mul = mul <2 x i64> %1, %2 + %shr = lshr <2 x i64> %mul, splat(i64 32) + %tr = trunc <2 x i64> %shr to <2 x i32> + ret <2 x i32> %tr +} + +define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: smulh_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <1 x i64> %a to <1 x i128> + %2 = sext <1 x i64> %b to <1 x i128> + %mul = mul <1 x i128> %1, %2 + %shr = lshr <1 x i128> %mul, splat(i128 64) + %tr = trunc <1 x i128> %shr to <1 x i64> + ret <1 x i64> %tr +} + +define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: umulh_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <8 x i8> %a to <8 x i16> + %2 = zext <8 x i8> %b to <8 x i16> + %mul = mul <8 x i16> %1, %2 + %shr = lshr <8 x i16> %mul, splat(i16 8) + %tr = trunc <8 x i16> %shr to <8 x i8> + ret <8 x i8> %tr +} + +define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: umulh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <4 x i16> %a to <4 x i32> + %2 = zext <4 x i16> %b to <4 x i32> + %mul = mul <4 x i32> %1, %2 + %shr = lshr <4 x i32> %mul, splat(i32 16) + %tr = trunc <4 x i32> %shr to <4 x i16> + ret <4 x i16> %tr +} + +define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: umulh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <2 x i32> %a to <2 x i64> + %2 = zext <2 x i32> %b to <2 x i64> + %mul = mul <2 x i64> %1, %2 + %shr = lshr <2 x i64> %mul, splat(i64 32) + %tr = trunc <2 x i64> %shr to <2 x i32> + ret <2 x i32> %tr +} + +define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: umulh_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <1 x i64> %a to <1 x i128> + %2 = zext <1 x i64> %b to <1 x i128> + %mul = mul <1 x i128> %1, %2 + %shr = lshr <1 x i128> %mul, splat(i128 64) + %tr = trunc <1 x i128> %shr to <1 x i64> + ret <1 x i64> %tr +} + diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll index bcf76d5b13d62..917d8e6ec22ef 100644 --- a/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve2-int-mulh.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s ; ; SMULH ; -define @smulh_i8( %a, %b) #0 { +define @smulh_i8( %a, %b) { ; CHECK-LABEL: smulh_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: smulh z0.b, z0.b, z1.b @@ -18,7 +18,7 @@ define @smulh_i8( %a, %b ret %tr } -define @smulh_i16( %a, %b) #0 { +define @smulh_i16( %a, %b) { ; CHECK-LABEL: smulh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: smulh z0.h, z0.h, z1.h @@ -31,7 +31,7 @@ define @smulh_i16( %a, % ret %tr } -define @smulh_i32( %a, %b) #0 { +define @smulh_i32( %a, %b) { ; CHECK-LABEL: smulh_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: smulh z0.s, z0.s, z1.s @@ -44,7 +44,7 @@ define @smulh_i32( %a, % ret %tr } -define @smulh_i64( %a, %b) #0 { +define @smulh_i64( %a, %b) { ; CHECK-LABEL: smulh_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: smulh z0.d, z0.d, z1.d @@ -61,7 +61,7 @@ define @smulh_i64( %a, % ; UMULH ; -define @umulh_i8( %a, %b) #0 { +define @umulh_i8( %a, %b) { ; CHECK-LABEL: umulh_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: umulh z0.b, z0.b, z1.b @@ -74,7 +74,7 @@ define @umulh_i8( %a, %b ret %tr } -define @umulh_i16( %a, %b) #0 { +define @umulh_i16( %a, %b) { ; CHECK-LABEL: umulh_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: umulh z0.h, z0.h, z1.h @@ -87,7 +87,7 @@ define @umulh_i16( %a, % ret %tr } -define @umulh_i32( %a, %b) #0 { +define @umulh_i32( %a, %b) { ; CHECK-LABEL: umulh_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: umulh z0.s, z0.s, z1.s @@ -100,7 +100,7 @@ define @umulh_i32( %a, % ret %tr } -define @umulh_i64( %a, %b) #0 { +define @umulh_i64( %a, %b) { ; CHECK-LABEL: umulh_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: umulh z0.d, z0.d, z1.d @@ -113,4 +113,265 @@ define @umulh_i64( %a, % ret %tr } -attributes #0 = { "target-features"="+sve2" } + +; Fixed-length 128bits + +define <16 x i8> @smulh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: smulh_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: smulh z0.b, z0.b, z1.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <16 x i8> %a to <16 x i16> + %2 = sext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %1, %2 + %shr = lshr <16 x i16> %mul, splat(i16 8) + %tr = trunc <16 x i16> %shr to <16 x i8> + ret <16 x i8> %tr +} + +define <8 x i16> @smulh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: smulh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: smulh z0.h, z0.h, z1.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <8 x i16> %a to <8 x i32> + %2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %1, %2 + %shr = lshr <8 x i32> %mul, splat(i32 16) + %tr = trunc <8 x i32> %shr to <8 x i16> + ret <8 x i16> %tr +} + +define <4 x i32> @smulh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: smulh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: smulh z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <4 x i32> %a to <4 x i64> + %2 = sext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %2 + %shr = lshr <4 x i64> %mul, splat(i64 32) + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <2 x i64> @smulh_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: smulh_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: smulh z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <2 x i64> %a to <2 x i128> + %2 = sext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %2 + %shr = lshr <2 x i128> %mul, splat(i128 64) + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + +define <16 x i8> @umulh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: umulh_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: umulh z0.b, z0.b, z1.b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <16 x i8> %a to <16 x i16> + %2 = zext <16 x i8> %b to <16 x i16> + %mul = mul <16 x i16> %1, %2 + %shr = lshr <16 x i16> %mul, splat(i16 8) + %tr = trunc <16 x i16> %shr to <16 x i8> + ret <16 x i8> %tr +} + +define <8 x i16> @umulh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: umulh_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: umulh z0.h, z0.h, z1.h +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <8 x i16> %a to <8 x i32> + %2 = zext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %1, %2 + %shr = lshr <8 x i32> %mul, splat(i32 16) + %tr = trunc <8 x i32> %shr to <8 x i16> + ret <8 x i16> %tr +} + +define <4 x i32> @umulh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: umulh_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: umulh z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <4 x i32> %a to <4 x i64> + %2 = zext <4 x i32> %b to <4 x i64> + %mul = mul <4 x i64> %1, %2 + %shr = lshr <4 x i64> %mul, splat(i64 32) + %tr = trunc <4 x i64> %shr to <4 x i32> + ret <4 x i32> %tr +} + +define <2 x i64> @umulh_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: umulh_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-NEXT: umulh z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <2 x i64> %a to <2 x i128> + %2 = zext <2 x i64> %b to <2 x i128> + %mul = mul <2 x i128> %1, %2 + %shr = lshr <2 x i128> %mul, splat(i128 64) + %tr = trunc <2 x i128> %shr to <2 x i64> + ret <2 x i64> %tr +} + + + +; Fixed-length 64bits + +define <8 x i8> @smulh_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: smulh_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: smulh z0.b, z0.b, z1.b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <8 x i8> %a to <8 x i16> + %2 = sext <8 x i8> %b to <8 x i16> + %mul = mul <8 x i16> %1, %2 + %shr = lshr <8 x i16> %mul, splat(i16 8) + %tr = trunc <8 x i16> %shr to <8 x i8> + ret <8 x i8> %tr +} + +define <4 x i16> @smulh_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: smulh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: smulh z0.h, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <4 x i16> %a to <4 x i32> + %2 = sext <4 x i16> %b to <4 x i32> + %mul = mul <4 x i32> %1, %2 + %shr = lshr <4 x i32> %mul, splat(i32 16) + %tr = trunc <4 x i32> %shr to <4 x i16> + ret <4 x i16> %tr +} + +define <2 x i32> @smulh_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: smulh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: smulh z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <2 x i32> %a to <2 x i64> + %2 = sext <2 x i32> %b to <2 x i64> + %mul = mul <2 x i64> %1, %2 + %shr = lshr <2 x i64> %mul, splat(i64 32) + %tr = trunc <2 x i64> %shr to <2 x i32> + ret <2 x i32> %tr +} + +define <1 x i64> @smulh_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: smulh_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: smulh z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = sext <1 x i64> %a to <1 x i128> + %2 = sext <1 x i64> %b to <1 x i128> + %mul = mul <1 x i128> %1, %2 + %shr = lshr <1 x i128> %mul, splat(i128 64) + %tr = trunc <1 x i128> %shr to <1 x i64> + ret <1 x i64> %tr +} + +define <8 x i8> @umulh_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: umulh_v8i8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: umulh z0.b, z0.b, z1.b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <8 x i8> %a to <8 x i16> + %2 = zext <8 x i8> %b to <8 x i16> + %mul = mul <8 x i16> %1, %2 + %shr = lshr <8 x i16> %mul, splat(i16 8) + %tr = trunc <8 x i16> %shr to <8 x i8> + ret <8 x i8> %tr +} + +define <4 x i16> @umulh_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: umulh_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: umulh z0.h, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <4 x i16> %a to <4 x i32> + %2 = zext <4 x i16> %b to <4 x i32> + %mul = mul <4 x i32> %1, %2 + %shr = lshr <4 x i32> %mul, splat(i32 16) + %tr = trunc <4 x i32> %shr to <4 x i16> + ret <4 x i16> %tr +} + +define <2 x i32> @umulh_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: umulh_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: umulh z0.s, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <2 x i32> %a to <2 x i64> + %2 = zext <2 x i32> %b to <2 x i64> + %mul = mul <2 x i64> %1, %2 + %shr = lshr <2 x i64> %mul, splat(i64 32) + %tr = trunc <2 x i64> %shr to <2 x i32> + ret <2 x i32> %tr +} + +define <1 x i64> @umulh_v1i64(<1 x i64> %a, <1 x i64> %b) { +; CHECK-LABEL: umulh_v1i64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: umulh z0.d, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 +; CHECK-NEXT: ret + %1 = zext <1 x i64> %a to <1 x i128> + %2 = zext <1 x i64> %b to <1 x i128> + %mul = mul <1 x i128> %1, %2 + %shr = lshr <1 x i128> %mul, splat(i128 64) + %tr = trunc <1 x i128> %shr to <1 x i64> + ret <1 x i64> %tr +}