diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll index bd28d13973f9c..256ff94830113 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -1,5 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64 declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5 @@ -101,11 +107,18 @@ entry: } define <8 x i16> @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-LABEL: test_vaddl_a8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddl_a8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddl_a8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %vmovl.i.i = zext <8 x i8> %a to <8 x i16> %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> @@ -229,11 +242,18 @@ entry: } define <8 x i16> @test_vaddl_high_a8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: test_vaddl_high_a8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddl_high_a8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddl2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddl_high_a8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: uaddl2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> @@ -345,11 +365,18 @@ entry: } define <8 x i16> @test_vaddw_a8(<8 x i16> %a, <8 x i8> %b) { -; CHECK-LABEL: test_vaddw_a8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddw_a8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b +; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddw_a8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: uaddw v0.8h, v0.8h, v1.8b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %vmovl.i.i = zext <8 x i8> %b to <8 x i16> %add.i = add <8 x i16> %vmovl.i.i, %a @@ -458,11 +485,18 @@ entry: } define <8 x i16> @test_vaddw_high_a8(<8 x i16> %a, <16 x i8> %b) { -; CHECK-LABEL: test_vaddw_high_a8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddw2 v0.8h, v0.8h, v1.16b -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddw_high_a8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: uaddw2 v0.8h, v0.8h, v1.16b +; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddw_high_a8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: uaddw2 v0.8h, v0.8h, v1.16b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> @@ -574,11 +608,18 @@ entry: } define <8 x i16> @test_vsubl_a8(<8 x i8> %a, <8 x i8> %b) { -; CHECK-LABEL: test_vsubl_a8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubl_a8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubl_a8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %vmovl.i.i = zext <8 x i8> %a to <8 x i16> %vmovl.i2.i = zext <8 x i8> %b to <8 x i16> @@ -702,11 +743,18 @@ entry: } define <8 x i16> @test_vsubl_high_a8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: test_vsubl_high_a8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: usubl2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubl_high_a8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubl2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubl_high_a8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: usubl2 v0.8h, v0.16b, v1.16b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> @@ -818,11 +866,18 @@ entry: } define <8 x i16> @test_vsubw_a8(<8 x i16> %a, <8 x i8> %b) { -; CHECK-LABEL: test_vsubw_a8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubw_a8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubw v0.8h, v0.8h, v1.8b +; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubw_a8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: usubw v0.8h, v0.8h, v1.8b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %vmovl.i.i = zext <8 x i8> %b to <8 x i16> %sub.i = sub <8 x i16> %a, %vmovl.i.i @@ -931,11 +986,18 @@ entry: } define <8 x i16> @test_vsubw_high_a8(<8 x i16> %a, <16 x i8> %b) { -; CHECK-LABEL: test_vsubw_high_a8: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: usubw2 v0.8h, v0.8h, v1.16b -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubw_high_a8: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: usubw2 v0.8h, v0.8h, v1.16b +; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubw_high_a8: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff +; CHECK-GI-NEXT: usubw2 v0.8h, v0.8h, v1.16b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret entry: %shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> %0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16> @@ -975,10 +1037,16 @@ entry: } define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vaddhn_s16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_s16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_s16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 +; CHECK-GI-NEXT: ret entry: %vaddhn.i = add <8 x i16> %a, %b %vaddhn1.i = lshr <8 x i16> %vaddhn.i, @@ -987,10 +1055,16 @@ entry: } define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vaddhn_s32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_s32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_s32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 +; CHECK-GI-NEXT: ret entry: %vaddhn.i = add <4 x i32> %a, %b %vaddhn1.i = lshr <4 x i32> %vaddhn.i, @@ -999,10 +1073,16 @@ entry: } define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vaddhn_s64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_s64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_s64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-GI-NEXT: ret entry: %vaddhn.i = add <2 x i64> %a, %b %vaddhn1.i = lshr <2 x i64> %vaddhn.i, @@ -1011,10 +1091,16 @@ entry: } define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vaddhn_u16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_u16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_u16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 +; CHECK-GI-NEXT: ret entry: %vaddhn.i = add <8 x i16> %a, %b %vaddhn1.i = lshr <8 x i16> %vaddhn.i, @@ -1023,10 +1109,16 @@ entry: } define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vaddhn_u32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_u32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_u32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 +; CHECK-GI-NEXT: ret entry: %vaddhn.i = add <4 x i32> %a, %b %vaddhn1.i = lshr <4 x i32> %vaddhn.i, @@ -1035,10 +1127,16 @@ entry: } define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vaddhn_u64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_u64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_u64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-GI-NEXT: ret entry: %vaddhn.i = add <2 x i64> %a, %b %vaddhn1.i = lshr <2 x i64> %vaddhn.i, @@ -1047,11 +1145,20 @@ entry: } define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vaddhn_high_s16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_high_s16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_high_s16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vaddhn.i.i = add <8 x i16> %a, %b %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, @@ -1064,11 +1171,20 @@ entry: } define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vaddhn_high_s32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_high_s32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_high_s32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vaddhn.i.i = add <4 x i32> %a, %b %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, @@ -1081,11 +1197,20 @@ entry: } define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vaddhn_high_s64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_high_s64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_high_s64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vaddhn.i.i = add <2 x i64> %a, %b %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, @@ -1098,11 +1223,20 @@ entry: } define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vaddhn_high_u16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_high_u16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_high_u16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vaddhn.i.i = add <8 x i16> %a, %b %vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, @@ -1115,11 +1249,20 @@ entry: } define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vaddhn_high_u32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_high_u32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_high_u32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vaddhn.i.i = add <4 x i32> %a, %b %vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, @@ -1132,11 +1275,20 @@ entry: } define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vaddhn_high_u64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vaddhn_high_u64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vaddhn_high_u64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vaddhn.i.i = add <2 x i64> %a, %b %vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, @@ -1209,11 +1361,19 @@ entry: } define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vraddhn_high_s16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vraddhn_high_s16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: raddhn2 v0.16b, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vraddhn_high_s16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: raddhn v1.8b, v1.8h, v2.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) %0 = bitcast <8 x i8> %r to <1 x i64> @@ -1224,11 +1384,19 @@ entry: } define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vraddhn_high_s32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vraddhn_high_s32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: raddhn2 v0.8h, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vraddhn_high_s32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: raddhn v1.4h, v1.4s, v2.4s +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) %0 = bitcast <4 x i16> %r to <1 x i64> @@ -1239,11 +1407,19 @@ entry: } define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vraddhn_high_s64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vraddhn_high_s64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: raddhn2 v0.4s, v1.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vraddhn_high_s64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: raddhn v1.2s, v1.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) %0 = bitcast <2 x i32> %r to <1 x i64> @@ -1254,11 +1430,19 @@ entry: } define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vraddhn_high_u16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vraddhn_high_u16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: raddhn2 v0.16b, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vraddhn_high_u16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: raddhn v1.8b, v1.8h, v2.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) %0 = bitcast <8 x i8> %r to <1 x i64> @@ -1269,11 +1453,19 @@ entry: } define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vraddhn_high_u32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vraddhn_high_u32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: raddhn2 v0.8h, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vraddhn_high_u32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: raddhn v1.4h, v1.4s, v2.4s +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) %0 = bitcast <4 x i16> %r to <1 x i64> @@ -1284,11 +1476,19 @@ entry: } define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vraddhn_high_u64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vraddhn_high_u64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: raddhn2 v0.4s, v1.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vraddhn_high_u64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: raddhn v1.2s, v1.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) %0 = bitcast <2 x i32> %r to <1 x i64> @@ -1299,10 +1499,16 @@ entry: } define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vsubhn_s16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_s16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_s16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 +; CHECK-GI-NEXT: ret entry: %vsubhn.i = sub <8 x i16> %a, %b %vsubhn1.i = lshr <8 x i16> %vsubhn.i, @@ -1311,10 +1517,16 @@ entry: } define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vsubhn_s32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_s32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_s32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 +; CHECK-GI-NEXT: ret entry: %vsubhn.i = sub <4 x i32> %a, %b %vsubhn1.i = lshr <4 x i32> %vsubhn.i, @@ -1323,10 +1535,16 @@ entry: } define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vsubhn_s64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_s64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_s64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-GI-NEXT: ret entry: %vsubhn.i = sub <2 x i64> %a, %b %vsubhn1.i = lshr <2 x i64> %vsubhn.i, @@ -1335,10 +1553,16 @@ entry: } define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vsubhn_u16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_u16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_u16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8 +; CHECK-GI-NEXT: ret entry: %vsubhn.i = sub <8 x i16> %a, %b %vsubhn1.i = lshr <8 x i16> %vsubhn.i, @@ -1347,10 +1571,16 @@ entry: } define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vsubhn_u32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_u32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_u32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16 +; CHECK-GI-NEXT: ret entry: %vsubhn.i = sub <4 x i32> %a, %b %vsubhn1.i = lshr <4 x i32> %vsubhn.i, @@ -1359,10 +1589,16 @@ entry: } define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vsubhn_u64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_u64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_u64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32 +; CHECK-GI-NEXT: ret entry: %vsubhn.i = sub <2 x i64> %a, %b %vsubhn1.i = lshr <2 x i64> %vsubhn.i, @@ -1371,11 +1607,20 @@ entry: } define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vsubhn_high_s16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_high_s16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_high_s16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vsubhn.i.i = sub <8 x i16> %a, %b %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, @@ -1388,11 +1633,20 @@ entry: } define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vsubhn_high_s32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_high_s32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_high_s32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vsubhn.i.i = sub <4 x i32> %a, %b %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, @@ -1405,11 +1659,20 @@ entry: } define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vsubhn_high_s64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_high_s64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_high_s64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vsubhn.i.i = sub <2 x i64> %a, %b %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, @@ -1422,11 +1685,20 @@ entry: } define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vsubhn_high_u16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_high_u16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_high_u16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vsubhn.i.i = sub <8 x i16> %a, %b %vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, @@ -1439,11 +1711,20 @@ entry: } define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vsubhn_high_u32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_high_u32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_high_u32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vsubhn.i.i = sub <4 x i32> %a, %b %vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, @@ -1456,11 +1737,20 @@ entry: } define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vsubhn_high_u64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vsubhn_high_u64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vsubhn_high_u64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vsubhn.i.i = sub <2 x i64> %a, %b %vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, @@ -1533,11 +1823,19 @@ entry: } define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vrsubhn_high_s16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrsubhn_high_s16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrsubhn_high_s16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: rsubhn v1.8b, v1.8h, v2.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) %0 = bitcast <8 x i8> %r to <1 x i64> @@ -1548,11 +1846,19 @@ entry: } define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vrsubhn_high_s32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrsubhn_high_s32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrsubhn_high_s32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: rsubhn v1.4h, v1.4s, v2.4s +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) %0 = bitcast <4 x i16> %r to <1 x i64> @@ -1563,11 +1869,19 @@ entry: } define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vrsubhn_high_s64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrsubhn_high_s64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrsubhn_high_s64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: rsubhn v1.2s, v1.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) %0 = bitcast <2 x i32> %r to <1 x i64> @@ -1578,11 +1892,19 @@ entry: } define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: test_vrsubhn_high_u16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrsubhn_high_u16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrsubhn_high_u16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: rsubhn v1.8b, v1.8h, v2.8h +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) %0 = bitcast <8 x i8> %r to <1 x i64> @@ -1593,11 +1915,19 @@ entry: } define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: test_vrsubhn_high_u32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrsubhn_high_u32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrsubhn_high_u32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: rsubhn v1.4h, v1.4s, v2.4s +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) %0 = bitcast <4 x i16> %r to <1 x i64> @@ -1608,11 +1938,19 @@ entry: } define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: test_vrsubhn_high_u64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_vrsubhn_high_u64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_vrsubhn_high_u64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: rsubhn v1.2s, v1.2d, v2.2d +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret entry: %vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) %0 = bitcast <2 x i32> %r to <1 x i64> @@ -2535,21 +2873,40 @@ entry: } define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coerce) { -; CHECK-LABEL: cmplx_mul_combined_re_im: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #16 -; CHECK-NEXT: movi v1.2d, #0xffff0000ffff0000 -; CHECK-NEXT: rev32 v4.8h, v0.8h -; CHECK-NEXT: dup v2.8h, w8 -; CHECK-NEXT: sqneg v3.8h, v2.8h -; CHECK-NEXT: bsl v1.16b, v2.16b, v3.16b -; CHECK-NEXT: fmov d3, x0 -; CHECK-NEXT: sqdmull v2.4s, v4.4h, v1.4h -; CHECK-NEXT: sqdmull2 v1.4s, v4.8h, v1.8h -; CHECK-NEXT: sqdmlal v2.4s, v0.4h, v3.h[0] -; CHECK-NEXT: sqdmlal2 v1.4s, v0.8h, v3.h[0] -; CHECK-NEXT: uzp2 v0.8h, v2.8h, v1.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: cmplx_mul_combined_re_im: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: lsr x8, x0, #16 +; CHECK-SD-NEXT: movi v1.2d, #0xffff0000ffff0000 +; CHECK-SD-NEXT: rev32 v4.8h, v0.8h +; CHECK-SD-NEXT: dup v2.8h, w8 +; CHECK-SD-NEXT: sqneg v3.8h, v2.8h +; CHECK-SD-NEXT: bsl v1.16b, v2.16b, v3.16b +; CHECK-SD-NEXT: fmov d3, x0 +; CHECK-SD-NEXT: sqdmull v2.4s, v4.4h, v1.4h +; CHECK-SD-NEXT: sqdmull2 v1.4s, v4.8h, v1.8h +; CHECK-SD-NEXT: sqdmlal v2.4s, v0.4h, v3.h[0] +; CHECK-SD-NEXT: sqdmlal2 v1.4s, v0.8h, v3.h[0] +; CHECK-SD-NEXT: uzp2 v0.8h, v2.8h, v1.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cmplx_mul_combined_re_im: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: lsr x9, x0, #16 +; CHECK-GI-NEXT: adrp x8, .LCPI196_0 +; CHECK-GI-NEXT: rev32 v4.8h, v0.8h +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI196_0] +; CHECK-GI-NEXT: fmov d1, x9 +; CHECK-GI-NEXT: dup v2.8h, v1.h[0] +; CHECK-GI-NEXT: sqneg v1.8h, v2.8h +; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: fmov d3, x0 +; CHECK-GI-NEXT: sqdmull v2.4s, v2.4h, v3.h[0] +; CHECK-GI-NEXT: sqdmull v5.4s, v4.4h, v1.4h +; CHECK-GI-NEXT: sqdmlal v5.4s, v0.4h, v3.h[0] +; CHECK-GI-NEXT: sqdmlal2 v2.4s, v4.8h, v1.8h +; CHECK-GI-NEXT: uzp2 v0.8h, v5.8h, v2.8h +; CHECK-GI-NEXT: ret entry: %scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce, 16 %shuffle.i = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll index cc9732b83e10c..6c7ddd916abdf 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s +; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=arm64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>) declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>) @@ -197,11 +198,20 @@ define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { } define <2 x i32> @test_sabd_v2i32_const() { -; CHECK-LABEL: test_sabd_v2i32_const: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_sabd_v2i32_const: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI19_0 +; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI19_0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_sabd_v2i32_const: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI19_1 +; CHECK-GI-NEXT: adrp x9, .LCPI19_0 +; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI19_1] +; CHECK-GI-NEXT: ldr d1, [x9, :lo12:.LCPI19_0] +; CHECK-GI-NEXT: sabd v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32( <2 x i32> , <2 x i32> ) @@ -293,15 +303,26 @@ define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) { } define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) { -; CHECK-LABEL: test_uabd_knownbits_vec8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.8h, #15 -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h -; CHECK-NEXT: rev64 v0.8h, v0.8h -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_uabd_knownbits_vec8i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v2.8h, #15 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-SD-NEXT: rev64 v0.8h, v0.8h +; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_uabd_knownbits_vec8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.8h, #15 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: rev64 v0.8h, v0.8h +; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: ret %and1 = and <8 x i16> %lhs, %and2 = and <8 x i16> %rhs, %uabd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %and1, <8 x i16> %and2) @@ -311,11 +332,22 @@ define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) { } define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: knownbits_uabd_mask_and_shuffle_lshr: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ushr v0.4s, v0.4s, #17 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: knownbits_uabd_mask_and_shuffle_lshr: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #17 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: knownbits_uabd_mask_and_shuffle_lshr: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.2d, #0x00ffff0000ffff +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: rev64 v0.4s, v0.4s +; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #17 +; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = and <4 x i32> %a1, %3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2) @@ -325,10 +357,19 @@ define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> } define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: knownbits_mask_and_shuffle_lshr: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: knownbits_mask_and_shuffle_lshr: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: knownbits_mask_and_shuffle_lshr: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.4s, #127, msl #8 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #17 +; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = and <4 x i32> %a1, %3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2) @@ -338,20 +379,36 @@ define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) } define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) { -; CHECK-LABEL: test_sabd_knownbits_vec4i32: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI31_0 -; CHECK-NEXT: adrp x9, .LCPI31_1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI31_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI31_1] -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: and v1.16b, v1.16b, v3.16b -; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff -; CHECK-NEXT: mov v0.s[1], v0.s[0] -; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_sabd_knownbits_vec4i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI31_0 +; CHECK-SD-NEXT: adrp x9, .LCPI31_1 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI31_0] +; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI31_1] +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff +; CHECK-SD-NEXT: mov v0.s[1], v0.s[0] +; CHECK-SD-NEXT: trn2 v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_sabd_knownbits_vec4i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI31_2 +; CHECK-GI-NEXT: adrp x9, .LCPI31_1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_2] +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI31_1] +; CHECK-GI-NEXT: adrp x8, .LCPI31_0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_0] +; CHECK-GI-NEXT: movi v3.2d, #0x0000ff000000ff +; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-GI-NEXT: ret %and1 = and <4 x i32> %lhs, %and2 = and <4 x i32> %rhs, %abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %and1, <4 x i32> %and2) @@ -361,15 +418,27 @@ define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) { } define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: knownbits_sabd_and_mask: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI32_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI32_0] -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: knownbits_sabd_and_mask: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI32_0 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI32_0] +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: zip2 v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: knownbits_sabd_and_mask: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI32_1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_1] +; CHECK-GI-NEXT: adrp x8, .LCPI32_0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_0] +; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = and <4 x i32> %a1, %3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %1, <4 x i32> %2) @@ -378,10 +447,25 @@ define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) { } define <4 x i32> @knownbits_sabd_and_or_mask(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: knownbits_sabd_and_or_mask: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: knownbits_sabd_and_or_mask: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: knownbits_sabd_and_or_mask: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI33_1 +; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_1] +; CHECK-GI-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v3.16b +; CHECK-GI-NEXT: orr v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = or <4 x i32> %1, %3 = and <4 x i32> %a1, @@ -392,18 +476,33 @@ define <4 x i32> @knownbits_sabd_and_or_mask(<4 x i32> %a0, <4 x i32> %a1) { } define <4 x i32> @knownbits_sabd_and_xor_mask(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: knownbits_sabd_and_xor_mask: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI34_0 -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b -; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b -; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: knownbits_sabd_and_xor_mask: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI34_0 +; CHECK-SD-NEXT: movi v3.2d, #0x00ffff0000ffff +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: eor v0.16b, v0.16b, v3.16b +; CHECK-SD-NEXT: eor v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: zip2 v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: knownbits_sabd_and_xor_mask: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI34_1 +; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_1] +; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: eor v0.16b, v0.16b, v3.16b +; CHECK-GI-NEXT: eor v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = xor <4 x i32> %1, %3 = and <4 x i32> %a1, @@ -414,10 +513,24 @@ define <4 x i32> @knownbits_sabd_and_xor_mask(<4 x i32> %a0, <4 x i32> %a1) { } define <4 x i32> @knownbits_sabd_and_shl_mask(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: knownbits_sabd_and_shl_mask: -; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: knownbits_sabd_and_shl_mask: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: knownbits_sabd_and_shl_mask: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI35_1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_1] +; CHECK-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] +; CHECK-GI-NEXT: shl v0.4s, v0.4s, #17 +; CHECK-GI-NEXT: shl v1.4s, v1.4s, #17 +; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = shl <4 x i32> %1, %3 = and <4 x i32> %a1, @@ -428,18 +541,32 @@ define <4 x i32> @knownbits_sabd_and_shl_mask(<4 x i32> %a0, <4 x i32> %a1) { } define <4 x i32> @knownbits_sabd_and_mul_mask(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: knownbits_sabd_and_mul_mask: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI36_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] -; CHECK-NEXT: and v3.16b, v0.16b, v2.16b -; CHECK-NEXT: and v2.16b, v1.16b, v2.16b -; CHECK-NEXT: mul v0.4s, v0.4s, v3.4s -; CHECK-NEXT: mul v1.4s, v1.4s, v2.4s -; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov v0.s[1], v0.s[0] -; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: knownbits_sabd_and_mul_mask: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI36_0 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] +; CHECK-SD-NEXT: and v3.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: and v2.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: mul v0.4s, v0.4s, v3.4s +; CHECK-SD-NEXT: mul v1.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: mov v0.s[1], v0.s[0] +; CHECK-SD-NEXT: trn2 v0.4s, v0.4s, v0.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: knownbits_sabd_and_mul_mask: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI36_1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_1] +; CHECK-GI-NEXT: adrp x8, .LCPI36_0 +; CHECK-GI-NEXT: and v3.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v2.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: mul v0.4s, v0.4s, v3.4s +; CHECK-GI-NEXT: mul v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] +; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret %1 = and <4 x i32> %a0, %2 = mul <4 x i32> %a0, %1 %3 = and <4 x i32> %a1, diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll index f82d1ed87fba7..df4889b6f09de 100644 --- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll +++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-LE +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes CHECK,CHECK-LE ; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE +; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for test_pmull_high_p8_128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_p8_64 declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) @@ -12,10 +16,10 @@ declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>) declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %s1, <2 x i32> %s2) define <4 x i32> @test_smull_high_s16_base(<8 x i16> %a, <8 x i16> %b) #0 { -; CHECK-LE-LABEL: test_smull_high_s16_base: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_smull_high_s16_base: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_smull_high_s16_base: ; CHECK-BE: // %bb.0: // %entry @@ -35,10 +39,10 @@ entry: } define <4 x i32> @test_smull_high_s16_bitcasta1(<2 x i64> %aa, <8 x i16> %b) #0 { -; CHECK-LE-LABEL: test_smull_high_s16_bitcasta1: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_smull_high_s16_bitcasta1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_smull_high_s16_bitcasta1: ; CHECK-BE: // %bb.0: // %entry @@ -59,10 +63,10 @@ entry: } define <4 x i32> @test_smull_high_s16_bitcastb1(<8 x i16> %a, <16 x i8> %bb) #0 { -; CHECK-LE-LABEL: test_smull_high_s16_bitcastb1: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_smull_high_s16_bitcastb1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_smull_high_s16_bitcastb1: ; CHECK-BE: // %bb.0: // %entry @@ -83,10 +87,10 @@ entry: } define <4 x i32> @test_smull_high_s16_bitcasta2(<2 x i64> %a, <8 x i16> %b) #0 { -; CHECK-LE-LABEL: test_smull_high_s16_bitcasta2: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_smull_high_s16_bitcasta2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_smull_high_s16_bitcasta2: ; CHECK-BE: // %bb.0: // %entry @@ -109,10 +113,10 @@ entry: } define <4 x i32> @test_smull_high_s16_bitcastb2(<8 x i16> %a, <16 x i8> %b) #0 { -; CHECK-LE-LABEL: test_smull_high_s16_bitcastb2: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_smull_high_s16_bitcastb2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_smull_high_s16_bitcastb2: ; CHECK-BE: // %bb.0: // %entry @@ -157,6 +161,13 @@ define <4 x i32> @test_smull_high_s16_bitcasta1_wrongindex(<2 x i64> %aa, <8 x i ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_smull_high_s16_bitcasta1_wrongindex: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #4 +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ret entry: %a = bitcast <2 x i64> %aa to <8 x i16> %s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -186,6 +197,13 @@ define <4 x i32> @test_smull_high_s16_bitcastb1_wrongindex(<8 x i16> %a, <16 x i ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_smull_high_s16_bitcastb1_wrongindex: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ext v1.16b, v1.16b, v0.16b, #6 +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ret entry: %b = bitcast <16 x i8> %bb to <8 x i16> %s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -215,6 +233,13 @@ define <4 x i32> @test_smull_high_s16_bitcasta2_wrongindex(<4 x i32> %a, <8 x i1 ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_smull_high_s16_bitcasta2_wrongindex: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #4 +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ret entry: %s1a = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %s1 = bitcast <2 x i32> %s1a to <4 x i16> @@ -244,6 +269,13 @@ define <4 x i32> @test_smull_high_s16_bitcastb2_wrongindex(<8 x i16> %a, <16 x i ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_smull_high_s16_bitcastb2_wrongindex: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ext v1.16b, v1.16b, v0.16b, #4 +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ret entry: %s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %s2a = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> @@ -269,6 +301,12 @@ define <4 x i32> @test_smull_high_s16_splata1(<2 x i64> %aa, <8 x i16> %b) #0 { ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_smull_high_s16_splata1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: smull v0.4s, v1.4h, v0.h[3] +; CHECK-GI-NEXT: ret entry: %a = bitcast <2 x i64> %aa to <8 x i16> %s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -293,6 +331,12 @@ define <4 x i32> @test_smull_high_s16_splatb1(<8 x i16> %a, <16 x i8> %bb) #0 { ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_smull_high_s16_splatb1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.h[3] +; CHECK-GI-NEXT: ret entry: %b = bitcast <16 x i8> %bb to <8 x i16> %s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -322,6 +366,13 @@ define <4 x i32> @test_smull_high_s16_splata2(<4 x i32> %a, <8 x i16> %b) #0 { ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_smull_high_s16_splata2: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: dup v0.2s, v0.s[3] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ret entry: %s1a = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> %s1 = bitcast <2 x i32> %s1a to <4 x i16> @@ -351,6 +402,13 @@ define <4 x i32> @test_smull_high_s16_splatb2(<8 x i16> %a, <16 x i8> %b) #0 { ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_smull_high_s16_splatb2: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: dup v1.8b, v1.b[3] +; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-GI-NEXT: ret entry: %s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> %s2a = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> @@ -362,10 +420,10 @@ entry: define <4 x i32> @test_umull_high_s16_bitcasta1(<2 x i64> %aa, <8 x i16> %b) #0 { -; CHECK-LE-LABEL: test_umull_high_s16_bitcasta1: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: umull2 v0.4s, v0.8h, v1.8h -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_umull_high_s16_bitcasta1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.8h +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_umull_high_s16_bitcasta1: ; CHECK-BE: // %bb.0: // %entry @@ -386,10 +444,10 @@ entry: } define <8 x i16> @test_vabdl_high_u82(<16 x i8> %a, <8 x i16> %bb) { -; CHECK-LE-LABEL: test_vabdl_high_u82: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: uabdl2 v0.8h, v0.16b, v1.16b -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_vabdl_high_u82: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uabdl2 v0.8h, v0.16b, v1.16b +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_vabdl_high_u82: ; CHECK-BE: // %bb.0: // %entry @@ -411,10 +469,10 @@ entry: } define <8 x i16> @test_vabdl_high_s82(<16 x i8> %a, <8 x i16> %bb) { -; CHECK-LE-LABEL: test_vabdl_high_s82: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: sabdl2 v0.8h, v0.16b, v1.16b -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_vabdl_high_s82: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sabdl2 v0.8h, v0.16b, v1.16b +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_vabdl_high_s82: ; CHECK-BE: // %bb.0: // %entry @@ -436,10 +494,10 @@ entry: } define <4 x i32> @test_vqdmlal_high_s16_bitcast(<4 x i32> %a, <8 x i16> %b, <16 x i8> %cc) { -; CHECK-LE-LABEL: test_vqdmlal_high_s16_bitcast: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_vqdmlal_high_s16_bitcast: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_vqdmlal_high_s16_bitcast: ; CHECK-BE: // %bb.0: // %entry @@ -463,12 +521,12 @@ entry: } define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) { -; CHECK-LE-LABEL: test_pmull_high_p8_128: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: fmov d0, x3 -; CHECK-LE-NEXT: fmov d1, x1 -; CHECK-LE-NEXT: pmull v0.8h, v1.8b, v0.8b -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_pmull_high_p8_128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x3 +; CHECK-NEXT: fmov d1, x1 +; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_pmull_high_p8_128: ; CHECK-BE: // %bb.0: // %entry @@ -490,10 +548,10 @@ entry: } define <8 x i16> @test_pmull_high_p8_64(<2 x i64> %aa, <2 x i64> %bb) { -; CHECK-LE-LABEL: test_pmull_high_p8_64: -; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-LE-NEXT: ret +; CHECK-LABEL: test_pmull_high_p8_64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_pmull_high_p8_64: ; CHECK-BE: // %bb.0: // %entry @@ -532,6 +590,14 @@ define <8 x i16> @foov8i16(<16 x i8> %a1, <2 x i64> %b1) { ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: foov8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #5 +; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #5 +; CHECK-GI-NEXT: fmov x8, d1 +; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: ret %a0 = bitcast <16 x i8> %a1 to <4 x i32> %b0 = bitcast <2 x i64> %b1 to <4 x i32> %vshrn_low_shift = lshr <4 x i32> %a0, @@ -558,6 +624,12 @@ define <2 x i64> @hadd32_zext_asr(<16 x i8> %src1a) { ; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #1 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: hadd32_zext_asr: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #1 +; CHECK-GI-NEXT: ret %src1 = bitcast <16 x i8> %src1a to <4 x i32> %s1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> %zextsrc1 = zext <2 x i32> %s1 to <2 x i64> @@ -580,6 +652,12 @@ define <2 x i64> @test_umull_high_s16_splata1(<2 x i64> %aa, <4 x i32> %b) #0 { ; CHECK-BE-NEXT: umull2 v0.2d, v1.4s, v0.s[1] ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ret +; +; CHECK-GI-LABEL: test_umull_high_s16_splata1: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: umull v0.2d, v1.2s, v0.s[1] +; CHECK-GI-NEXT: ret entry: %a = bitcast <2 x i64> %aa to <4 x i32> %s1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/neon-saba.ll b/llvm/test/CodeGen/AArch64/neon-saba.ll index 78ccc89399292..19967bd1a69ec 100644 --- a/llvm/test/CodeGen/AArch64/neon-saba.ll +++ b/llvm/test/CodeGen/AArch64/neon-saba.ll @@ -1,13 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; SABA from ADD(ABS(SUB NSW)) define <4 x i32> @saba_abs_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { -; CHECK-LABEL: saba_abs_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: saba_abs_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: saba v0.4s, v1.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: saba_abs_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s +; CHECK-GI-NEXT: abs v1.4s, v1.4s +; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: ret %sub = sub nsw <4 x i32> %b, %c %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true) %add = add <4 x i32> %a, %abs @@ -15,10 +23,17 @@ define <4 x i32> @saba_abs_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 { } define <2 x i32> @saba_abs_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { -; CHECK-LABEL: saba_abs_2s: -; CHECK: // %bb.0: -; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: saba_abs_2s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: saba v0.2s, v1.2s, v2.2s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: saba_abs_2s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub v1.2s, v1.2s, v2.2s +; CHECK-GI-NEXT: abs v1.2s, v1.2s +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret %sub = sub nsw <2 x i32> %b, %c %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true) %add = add <2 x i32> %a, %abs @@ -26,10 +41,17 @@ define <2 x i32> @saba_abs_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 { } define <8 x i16> @saba_abs_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { -; CHECK-LABEL: saba_abs_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: saba_abs_8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: saba v0.8h, v1.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: saba_abs_8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h +; CHECK-GI-NEXT: abs v1.8h, v1.8h +; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: ret %sub = sub nsw <8 x i16> %b, %c %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true) %add = add <8 x i16> %a, %abs @@ -37,10 +59,17 @@ define <8 x i16> @saba_abs_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 { } define <4 x i16> @saba_abs_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { -; CHECK-LABEL: saba_abs_4h: -; CHECK: // %bb.0: -; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: saba_abs_4h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: saba v0.4h, v1.4h, v2.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: saba_abs_4h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub v1.4h, v1.4h, v2.4h +; CHECK-GI-NEXT: abs v1.4h, v1.4h +; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ret %sub = sub nsw <4 x i16> %b, %c %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true) %add = add <4 x i16> %a, %abs @@ -48,10 +77,17 @@ define <4 x i16> @saba_abs_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 { } define <16 x i8> @saba_abs_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { -; CHECK-LABEL: saba_abs_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: saba_abs_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: saba v0.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: saba_abs_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: abs v1.16b, v1.16b +; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret %sub = sub nsw <16 x i8> %b, %c %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true) %add = add <16 x i8> %a, %abs @@ -59,10 +95,17 @@ define <16 x i8> @saba_abs_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 { } define <8 x i8> @saba_abs_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 { -; CHECK-LABEL: saba_abs_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: saba_abs_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: saba v0.8b, v1.8b, v2.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: saba_abs_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sub v1.8b, v1.8b, v2.8b +; CHECK-GI-NEXT: abs v1.8b, v1.8b +; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %sub = sub nsw <8 x i8> %b, %c %abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %sub, i1 true) %add = add <8 x i8> %a, %abs