From 7d4ee32662e42093024fa1c7c1103f7484d080c1 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 8 Sep 2022 14:01:11 +0100 Subject: [PATCH] [AArch64] Add tests for shuffle (tbl2, tbl2) -> tbl4 fold. Add extra tests where shuffle (tbl2, tbl2) can be folded to tbl4. Regenerate check lines automatically as well. --- llvm/test/CodeGen/AArch64/arm64-tbl.ll | 237 +++++++++++++++++++++---- 1 file changed, 205 insertions(+), 32 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll index d1b54b8a6264d..e84d4fa85511a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll @@ -1,61 +1,200 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind { -; CHECK: tbl1_8b -; CHECK: tbl.8b +; CHECK-LABEL: tbl1_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.8b v0, { v0 }, v1 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B) ret <8 x i8> %tmp3 } define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { -; CHECK: tbl1_16b -; CHECK: tbl.16b +; CHECK-LABEL: tbl1_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.16b v0, { v0 }, v1 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B) ret <16 x i8> %tmp3 } define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { -; CHECK: tbl2_8b -; CHECK: tbl.8b +; CHECK-LABEL: tbl2_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) ret <8 x i8> %tmp3 } define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -; CHECK: tbl2_16b -; CHECK: tbl.16b +; CHECK-LABEL: tbl2_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %tmp3 } define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK: tbl3_8b -; CHECK: tbl.8b +; CHECK-LABEL: tbl3_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK: tbl3_16b -; CHECK: tbl.16b +; CHECK-LABEL: tbl3_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK: tbl4_8b -; CHECK: tbl.8b +; CHECK-LABEL: tbl4_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK: tbl4_16b -; CHECK: tbl.16b +; CHECK-LABEL: tbl4_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } +; CHECK-LABEL: .LCPI8_0: +; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 8 // 0x8 +; CHECK-NEXT: .byte 12 // 0xc +; CHECK-NEXT: .byte 16 // 0x10 +; CHECK-NEXT: .byte 20 // 0x14 +; CHECK-NEXT: .byte 24 // 0x18 +; CHECK-NEXT: .byte 28 // 0x1c +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff +; CHECK-NEXT: .byte 255 // 0xff + +define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: shuffled_tbl2_to_tbl4: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v4 +; CHECK-NEXT: mov.d v0[1], v1[0] +; CHECK-NEXT: ret + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + +define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_shuffle: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: adrp x8, .LCPI9_1 +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v4 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1] +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + +define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_tbl2_mask1: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI10_0 +; CHECK-NEXT: adrp x9, .LCPI10_1 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: adrp x8, .LCPI10_2 +; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI10_1] +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v5 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_2] +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + +define <16 x i8> @shuffled_tbl2_to_tbl4_incompatible_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { +; CHECK-LABEL: shuffled_tbl2_to_tbl4_incompatible_tbl2_mask2: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI11_0 +; CHECK-NEXT: adrp x9, .LCPI11_1 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: adrp x8, .LCPI11_2 +; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI11_1] +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v4 +; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v5 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_2] +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret + %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> ) + %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> ) + %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> + ret <16 x i8> %s +} + declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone @@ -66,57 +205,91 @@ declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, < declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind { -; CHECK: tbx1_8b -; CHECK: tbx.8b +; CHECK-LABEL: tbx1_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.8b v0, { v1 }, v2 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) ret <8 x i8> %tmp3 } define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { -; CHECK: tbx1_16b -; CHECK: tbx.16b +; CHECK-LABEL: tbx1_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.16b v0, { v1 }, v2 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %tmp3 } define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK: tbx2_8b -; CHECK: tbx.8b +; CHECK-LABEL: tbx2_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-NEXT: tbx.8b v0, { v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK: tbx2_16b -; CHECK: tbx.16b +; CHECK-LABEL: tbx2_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-NEXT: tbx.16b v0, { v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK: tbx3_8b -; CHECK: tbx.8b +; CHECK-LABEL: tbx3_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK: tbx3_16b -; CHECK: tbx.16b +; CHECK-LABEL: tbx3_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { -; CHECK: tbx4_8b -; CHECK: tbx.8b +; CHECK-LABEL: tbx4_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) ret <8 x i8> %tmp3 } define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { -; CHECK: tbx4_16b -; CHECK: tbx.16b +; CHECK-LABEL: tbx4_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) ret <16 x i8> %tmp3 }