diff --git a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll new file mode 100644 index 00000000000000..02692364d2be45 --- /dev/null +++ b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll @@ -0,0 +1,242 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 | FileCheck %s --check-prefixes=CHECK,CHECK-SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK,CHECK-ICX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4 + + +define <4 x i32> @shuf_rot_v4i32_1032(<4 x i32> %x) { +; CHECK-LABEL: shuf_rot_v4i32_1032: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] +; CHECK-NEXT: retq + %x1 = add <4 x i32> %x, %x + %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %r +} + +define <8 x i32> @shuf_rot_v8i32_10325476(<8 x i32> %x) { +; CHECK-LABEL: shuf_rot_v8i32_10325476: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6] +; CHECK-NEXT: retq + %x1 = add <8 x i32> %x, %x + %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> + ret <8 x i32> %r +} + +define <16 x i32> @shuf_rot_v16i32_1032547698111013121514(<16 x i32> %x) { +; CHECK-LABEL: shuf_rot_v16i32_1032547698111013121514: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] +; CHECK-NEXT: retq + %x1 = add <16 x i32> %x, %x + %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> + ret <16 x i32> %r +} + +define <8 x i16> @shuf_rot_v8i16_10325476(<8 x i16> %x) { +; CHECK-LABEL: shuf_rot_v8i16_10325476: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vprold $16, %xmm0, %xmm0 +; CHECK-NEXT: retq + %x1 = add <8 x i16> %x, %x + %r = shufflevector <8 x i16> %x1, <8 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %r +} + +define <16 x i16> @shuf_rot_v16i16_1032547698111013121514(<16 x i16> %x) { +; CHECK-LABEL: shuf_rot_v16i16_1032547698111013121514: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddw %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vprold $16, %ymm0, %ymm0 +; CHECK-NEXT: retq + %x1 = add <16 x i16> %x, %x + %r = shufflevector <16 x i16> %x1, <16 x i16> zeroinitializer, <16 x i32> + ret <16 x i16> %r +} + +define <32 x i16> @shuf_rot_v32i16_1234056749101181314151217181916212223202527272429303128(<32 x i16> %x) { +; CHECK-LABEL: shuf_rot_v32i16_1234056749101181314151217181916212223202527272429303128: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddw %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vprolq $48, %zmm0, %zmm0 +; CHECK-NEXT: retq + %x1 = add <32 x i16> %x, %x + %r = shufflevector <32 x i16> %x1, <32 x i16> zeroinitializer, <32 x i32> + ret <32 x i16> %r +} + +define <16 x i8> @shuf_rot_v16i8_2301674510118914151213(<16 x i8> %x) { +; CHECK-LABEL: shuf_rot_v16i8_2301674510118914151213: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vprold $16, %xmm0, %xmm0 +; CHECK-NEXT: retq + %x1 = add <16 x i8> %x, %x + %r = shufflevector <16 x i8> %x1, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %r +} + +define <32 x i8> @shuf_rot_v32i8_230167451011891415121318191617222320212627242530312829(<32 x i8> %x) { +; CHECK-LABEL: shuf_rot_v32i8_230167451011891415121318191617222320212627242530312829: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vprold $16, %ymm0, %ymm0 +; CHECK-NEXT: retq + %x1 = add <32 x i8> %x, %x + %r = shufflevector <32 x i8> %x1, <32 x i8> zeroinitializer, <32 x i32> + ret <32 x i8> %r +} + +define <64 x i8> @shuf_rot_v64i8_3012745611891015121314191617182320212227242526312829303532333439363738434041424744454651484950555253545956575863606162(<64 x i8> %x) { +; CHECK-LABEL: shuf_rot_v64i8_3012745611891015121314191617182320212227242526312829303532333439363738434041424744454651484950555253545956575863606162: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vprold $8, %zmm0, %zmm0 +; CHECK-NEXT: retq + %x1 = add <64 x i8> %x, %x + %r = shufflevector <64 x i8> %x1, <64 x i8> zeroinitializer, <64 x i32> + ret <64 x i8> %r +} + +define <4 x i32> @shuf_shr_v4i32_1U3U(<4 x i32> %x) { +; CHECK-LABEL: shuf_shr_v4i32_1U3U: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-NEXT: retq + %x1 = add <4 x i32> %x, %x + %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %r +} + +define <8 x i32> @shuf_shr_v8i32_1U3U5U7U(<8 x i32> %x) { +; CHECK-LABEL: shuf_shr_v8i32_1U3U5U7U: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] +; CHECK-NEXT: retq + %x1 = add <8 x i32> %x, %x + %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> + ret <8 x i32> %r +} + +define <16 x i32> @shuf_shr_v16i32_U3U5U7U9U11U13U15(<16 x i32> %x) { +; CHECK-LABEL: shuf_shr_v16i32_U3U5U7U9U11U13U15: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15] +; CHECK-NEXT: retq + %x1 = add <16 x i32> %x, %x + %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> + ret <16 x i32> %r +} + +define <8 x i16> @shuf_shr_v8i16_123U567U(<8 x i16> %x) { +; CHECK-LABEL: shuf_shr_v8i16_123U567U: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddw %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpsrlq $16, %xmm0, %xmm0 +; CHECK-NEXT: retq + %x1 = add <8 x i16> %x, %x + %r = shufflevector <8 x i16> %x1, <8 x i16> zeroinitializer, <8 x i32> + ret <8 x i16> %r +} + +define <32 x i16> @shuf_shr_v32i16_1U3U5U7U9U11U13U15U17U19U21U23U25U27U29U31U(<32 x i16> %x) { +; CHECK-LABEL: shuf_shr_v32i16_1U3U5U7U9U11U13U15U17U19U21U23U25U27U29U31U: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddw %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vpsrld $16, %zmm0, %zmm0 +; CHECK-NEXT: retq + %x1 = add <32 x i16> %x, %x + %r = shufflevector <32 x i16> %x1, <32 x i16> zeroinitializer, <32 x i32> + ret <32 x i16> %r +} + +define <32 x i8> @shuf_shr_v32i8_1U3U5U7U9U11U13U15U17U19U21U23U25U27U29U31U(<32 x i8> %x) { +; CHECK-LABEL: shuf_shr_v32i8_1U3U5U7U9U11U13U15U17U19U21U23U25U27U29U31U: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vpsrlw $8, %ymm0, %ymm0 +; CHECK-NEXT: retq + %x1 = add <32 x i8> %x, %x + %r = shufflevector <32 x i8> %x1, <32 x i8> zeroinitializer, <32 x i32> + ret <32 x i8> %r +} + +define <4 x i32> @shuf_shl_v4i32_U0U2(<4 x i32> %x) { +; CHECK-LABEL: shuf_shl_v4i32_U0U2: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] +; CHECK-NEXT: retq + %x1 = add <4 x i32> %x, %x + %r = shufflevector <4 x i32> %x1, <4 x i32> zeroinitializer, <4 x i32> + ret <4 x i32> %r +} + +define <8 x i32> @shuf_shl_v8i32_U0U2U4U6(<8 x i32> %x) { +; CHECK-LABEL: shuf_shl_v8i32_U0U2U4U6: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] +; CHECK-NEXT: retq + %x1 = add <8 x i32> %x, %x + %r = shufflevector <8 x i32> %x1, <8 x i32> zeroinitializer, <8 x i32> + ret <8 x i32> %r +} + +define <16 x i32> @shuf_shl_v16i32_U0U2U4U6U8U10U12U14(<16 x i32> %x) { +; CHECK-LABEL: shuf_shl_v16i32_U0U2U4U6U8U10U12U14: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddd %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14] +; CHECK-NEXT: retq + %x1 = add <16 x i32> %x, %x + %r = shufflevector <16 x i32> %x1, <16 x i32> zeroinitializer, <16 x i32> + ret <16 x i32> %r +} + +define <16 x i16> @shuf_shl_v16i16_U0U2U4U6U8U10U12U14(<16 x i16> %x) { +; CHECK-LABEL: shuf_shl_v16i16_U0U2U4U6U8U10U12U14: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddw %ymm0, %ymm0, %ymm0 +; CHECK-NEXT: vpslld $16, %ymm0, %ymm0 +; CHECK-NEXT: retq + %x1 = add <16 x i16> %x, %x + %r = shufflevector <16 x i16> %x1, <16 x i16> zeroinitializer, <16 x i32> + ret <16 x i16> %r +} + +define <16 x i8> @shuf_shl_v16i8_U0U2U4U6U8U10U12U14(<16 x i8> %x) { +; CHECK-LABEL: shuf_shl_v16i8_U0U2U4U6U8U10U12U14: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vpsllw $8, %xmm0, %xmm0 +; CHECK-NEXT: retq + %x1 = add <16 x i8> %x, %x + %r = shufflevector <16 x i8> %x1, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %r +} + +define <64 x i8> @shuf_shl_v64i8_U0U2U4U6U8U10U12U14U16U18U20U22U24U26U28U30U32U34U36U38U40U42U44U46U48U50U52U54U56U58U60U62(<64 x i8> %x) { +; CHECK-LABEL: shuf_shl_v64i8_U0U2U4U6U8U10U12U14U16U18U20U22U24U26U28U30U32U34U36U38U40U42U44U46U48U50U52U54U56U58U60U62: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddb %zmm0, %zmm0, %zmm0 +; CHECK-NEXT: vpsllw $8, %zmm0, %zmm0 +; CHECK-NEXT: retq + %x1 = add <64 x i8> %x, %x + %r = shufflevector <64 x i8> %x1, <64 x i8> zeroinitializer, <64 x i32> + ret <64 x i8> %r +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-ICX: {{.*}} +; CHECK-SKX: {{.*}} +; CHECK-V4: {{.*}} +; CHECK-ZNVER4: {{.*}}