diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll index 94f86e7c88aeb..6e01ebc95a1cb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll @@ -1,107 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s -define <8 x i8> @vuzpi8(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vuzpi8: -;CHECK: uzp1.8b -;CHECK: uzp2.8b -;CHECK-NEXT: add.8b - %tmp1 = load <8 x i8>, ptr %A - %tmp2 = load <8 x i8>, ptr %B - %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> - %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> - %tmp5 = add <8 x i8> %tmp3, %tmp4 - ret <8 x i8> %tmp5 +define <8 x i8> @vuzpi8(<8 x i8> %A, <8 x i8> %B) nounwind { +; CHECK-LABEL: vuzpi8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1.8b v2, v0, v1 +; CHECK-NEXT: uzp2.8b v0, v0, v1 +; CHECK-NEXT: add.8b v0, v2, v0 +; CHECK-NEXT: ret + %tmp3 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> + %tmp4 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 } -define <4 x i16> @vuzpi16(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vuzpi16: -;CHECK: uzp1.4h -;CHECK: uzp2.4h -;CHECK-NEXT: add.4h - %tmp1 = load <4 x i16>, ptr %A - %tmp2 = load <4 x i16>, ptr %B - %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> - %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> - %tmp5 = add <4 x i16> %tmp3, %tmp4 - ret <4 x i16> %tmp5 +define <4 x i16> @vuzpi16(<4 x i16> %A, <4 x i16> %B) nounwind { +; CHECK-LABEL: vuzpi16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1.4h v2, v0, v1 +; CHECK-NEXT: uzp2.4h v0, v0, v1 +; CHECK-NEXT: add.4h v0, v2, v0 +; CHECK-NEXT: ret + %tmp3 = shufflevector <4 x i16> %A, <4 x i16> %B, <4 x i32> + %tmp4 = shufflevector <4 x i16> %A, <4 x i16> %B, <4 x i32> + %tmp5 = add <4 x i16> %tmp3, %tmp4 + ret <4 x i16> %tmp5 } -define <16 x i8> @vuzpQi8(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vuzpQi8: -;CHECK: uzp1.16b -;CHECK: uzp2.16b -;CHECK-NEXT: add.16b - %tmp1 = load <16 x i8>, ptr %A - %tmp2 = load <16 x i8>, ptr %B - %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> - %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> - %tmp5 = add <16 x i8> %tmp3, %tmp4 - ret <16 x i8> %tmp5 +define <16 x i8> @vuzpQi8(<16 x i8> %A, <16 x i8> %B) nounwind { +; CHECK-LABEL: vuzpQi8: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1.16b v2, v0, v1 +; CHECK-NEXT: uzp2.16b v0, v0, v1 +; CHECK-NEXT: add.16b v0, v2, v0 +; CHECK-NEXT: ret + %tmp3 = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> + %tmp4 = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> + %tmp5 = add <16 x i8> %tmp3, %tmp4 + ret <16 x i8> %tmp5 } -define <8 x i16> @vuzpQi16(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vuzpQi16: -;CHECK: uzp1.8h -;CHECK: uzp2.8h -;CHECK-NEXT: add.8h - %tmp1 = load <8 x i16>, ptr %A - %tmp2 = load <8 x i16>, ptr %B - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> - %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> - %tmp5 = add <8 x i16> %tmp3, %tmp4 - ret <8 x i16> %tmp5 +define <8 x i16> @vuzpQi16(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vuzpQi16: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1.8h v2, v0, v1 +; CHECK-NEXT: uzp2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 +; CHECK-NEXT: ret + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } -define <4 x i32> @vuzpQi32(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vuzpQi32: -;CHECK: uzp1.4s -;CHECK: uzp2.4s -;CHECK-NEXT: add.4s - %tmp1 = load <4 x i32>, ptr %A - %tmp2 = load <4 x i32>, ptr %B - %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> - %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> - %tmp5 = add <4 x i32> %tmp3, %tmp4 - ret <4 x i32> %tmp5 +define <4 x i32> @vuzpQi32(<4 x i32> %A, <4 x i32> %B) nounwind { +; CHECK-LABEL: vuzpQi32: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1.4s v2, v0, v1 +; CHECK-NEXT: uzp2.4s v0, v0, v1 +; CHECK-NEXT: add.4s v0, v2, v0 +; CHECK-NEXT: ret + %tmp3 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %tmp4 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> + %tmp5 = add <4 x i32> %tmp3, %tmp4 + ret <4 x i32> %tmp5 } -define <4 x float> @vuzpQf(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vuzpQf: -;CHECK: uzp1.4s -;CHECK: uzp2.4s -;CHECK-NEXT: fadd.4s - %tmp1 = load <4 x float>, ptr %A - %tmp2 = load <4 x float>, ptr %B - %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> - %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> - %tmp5 = fadd <4 x float> %tmp3, %tmp4 - ret <4 x float> %tmp5 +define <4 x float> @vuzpQf(<4 x float> %A, <4 x float> %B) nounwind { +; CHECK-LABEL: vuzpQf: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1.4s v2, v0, v1 +; CHECK-NEXT: uzp2.4s v0, v0, v1 +; CHECK-NEXT: fadd.4s v0, v2, v0 +; CHECK-NEXT: ret + %tmp3 = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> + %tmp4 = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> + %tmp5 = fadd <4 x float> %tmp3, %tmp4 + ret <4 x float> %tmp5 } ; Undef shuffle indices should not prevent matching to VUZP: -define <8 x i8> @vuzpi8_undef(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vuzpi8_undef: -;CHECK: uzp1.8b -;CHECK: uzp2.8b -;CHECK-NEXT: add.8b - %tmp1 = load <8 x i8>, ptr %A - %tmp2 = load <8 x i8>, ptr %B - %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> - %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> - %tmp5 = add <8 x i8> %tmp3, %tmp4 - ret <8 x i8> %tmp5 +define <8 x i8> @vuzpi8_undef(<8 x i8> %A, <8 x i8> %B) nounwind { +; CHECK-LABEL: vuzpi8_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1.8b v2, v0, v1 +; CHECK-NEXT: uzp2.8b v0, v0, v1 +; CHECK-NEXT: add.8b v0, v2, v0 +; CHECK-NEXT: ret + %tmp3 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> + %tmp4 = shufflevector <8 x i8> %A, <8 x i8> %B, <8 x i32> + %tmp5 = add <8 x i8> %tmp3, %tmp4 + ret <8 x i8> %tmp5 } -define <8 x i16> @vuzpQi16_undef(ptr %A, ptr %B) nounwind { -;CHECK-LABEL: vuzpQi16_undef: -;CHECK: uzp1.8h -;CHECK: uzp2.8h -;CHECK-NEXT: add.8h - %tmp1 = load <8 x i16>, ptr %A - %tmp2 = load <8 x i16>, ptr %B - %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> - %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> - %tmp5 = add <8 x i16> %tmp3, %tmp4 - ret <8 x i16> %tmp5 +define <8 x i16> @vuzpQi16_undef1(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vuzpQi16_undef1: +; CHECK: // %bb.0: +; CHECK-NEXT: uzp1.8h v2, v0, v1 +; CHECK-NEXT: uzp2.8h v0, v0, v1 +; CHECK-NEXT: add.8h v0, v2, v0 +; CHECK-NEXT: ret + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <8 x i16> @vuzpQi16_undef0(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vuzpQi16_undef0: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: uzp2.8h v3, v0, v1 +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: ret + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <8 x i16> @vuzpQi16_undef01(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vuzpQi16_undef01: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: uzp2.8h v3, v0, v1 +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: ret + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 +} + +define <8 x i16> @vuzpQi16_undef012(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vuzpQi16_undef012: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: uzp2.8h v3, v0, v1 +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: add.8h v0, v0, v3 +; CHECK-NEXT: ret + %tmp3 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp4 = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + %tmp5 = add <8 x i16> %tmp3, %tmp4 + ret <8 x i16> %tmp5 } diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index c707265c06c5c..349751dda461f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -139,6 +139,90 @@ define <16 x i8> @vzipQi8_undef(ptr %A, ptr %B) nounwind { ret <16 x i8> %tmp5 } +define <8 x i16> @vzip1_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vzip1_undef_01: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret + %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + ret <8 x i16> %s +} + +define <8 x i16> @vzip1_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vzip1_undef_0: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret + %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + ret <8 x i16> %s +} + +define <8 x i16> @vzip1_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vzip1_undef_1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip1.8h v0, v0, v1 +; CHECK-NEXT: ret + %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + ret <8 x i16> %s +} + +define <8 x i16> @vzip1_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vzip1_undef_012: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI11_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret + %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + ret <8 x i16> %s +} + +define <8 x i16> @vzip2_undef_01(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vzip2_undef_01: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.8h v0, v0, v1 +; CHECK-NEXT: ret + %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + ret <8 x i16> %s +} + +define <8 x i16> @vzip2_undef_0(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vzip2_undef_0: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.8h v0, v0, v1 +; CHECK-NEXT: ret + %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + ret <8 x i16> %s +} + +define <8 x i16> @vzip2_undef_1(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vzip2_undef_1: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.8h v0, v0, v1 +; CHECK-NEXT: ret + %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + ret <8 x i16> %s +} + +define <8 x i16> @vzip2_undef_012(<8 x i16> %A, <8 x i16> %B) nounwind { +; CHECK-LABEL: vzip2_undef_012: +; CHECK: // %bb.0: +; CHECK-NEXT: zip2.8h v0, v0, v1 +; CHECK-NEXT: ret + %s = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> + ret <8 x i16> %s +} + define <16 x i8> @combine_v16i8(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v16i8: ; CHECK: // %bb.0: @@ -251,9 +335,9 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v8i16_8first: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2 -; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: fmov d2, d0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3 ; CHECK-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> @@ -266,9 +350,9 @@ define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v8i16_8firstundef: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2 -; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: fmov d2, d0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3 ; CHECK-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32>