diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll index 3c95f4ce400e7..30574e4d4cdf4 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll @@ -590,6 +590,73 @@ define <64 x i8> @shuffle_v64i8_ashr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_7 ret <64 x i8> %5 } +define <64 x i8> @shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind { +; AVX512F-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrad $25, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrad $25, %zmm1, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = +; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u> +; AVX512F-NEXT: vpshufb %ymm5, %ymm2, %ymm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] +; AVX512F-NEXT: vpshufb %ymm4, %ymm1, %ymm1 +; AVX512F-NEXT: vpshufb %ymm5, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrad $25, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrad $25, %zmm1, %zmm1 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = +; AVX512BW-NEXT: vpshufb %ymm3, %ymm2, %ymm2 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u> +; AVX512BW-NEXT: vpshufb %ymm5, %ymm4, %ymm4 +; AVX512BW-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0],ymm2[1],ymm4[2],ymm2[3],ymm4[4],ymm2[5],ymm4[6],ymm2[7] +; AVX512BW-NEXT: vpshufb %ymm3, %ymm1, %ymm1 +; AVX512BW-NEXT: vpshufb %ymm5, %ymm0, %ymm0 +; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpsrad $25, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrad $25, %zmm1, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = +; AVX512DQ-NEXT: vpshufb %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u> +; AVX512DQ-NEXT: vpshufb %ymm5, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] +; AVX512DQ-NEXT: vpshufb %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpshufb %ymm5, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffle_v64i8_ashr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_92_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: vpsrad $25, %zmm0, %zmm2 +; AVX512VBMI-NEXT: vpsrad $25, %zmm1, %zmm1 +; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4,8,12,64,68,72,76,0,4,8,12,64,68,72,76,16,20,24,28,80,84,88,92,16,20,24,28,80,84,88,92,32,36,40,44,96,100,104,108,32,36,40,44,96,100,104,108,48,52,56,60,112,116,120,124,48,52,56,60,112,116,120,124] +; AVX512VBMI-NEXT: vpermi2b %zmm1, %zmm2, %zmm0 +; AVX512VBMI-NEXT: retq + %1 = ashr <16 x i32> %a0, + %2 = ashr <16 x i32> %a1, + %3 = bitcast <16 x i32> %1 to <64 x i8> + %4 = bitcast <16 x i32> %2 to <64 x i8> + %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> + ret <64 x i8> %5 +} + define <64 x i8> @shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; AVX512F-LABEL: shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_76_77_16_17_20_21_24_25_28_29_80_81_84_85_88_89_92_93_32_33_36_37_40_41_44_45_96_97_100_101_104_105_108_109_48_49_52_53_56_57_60_61_112_113_116_117_120_121_124_125: ; AVX512F: # %bb.0: @@ -634,6 +701,73 @@ define <64 x i8> @shuffle_v64i8_lshr_00_01_04_05_08_09_12_13_64_65_68_69_72_73_7 ret <64 x i8> %5 } +define <64 x i8> @shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124(<16 x i32> %a0, <16 x i32> %a1) nounwind { +; AVX512F-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpsrld $25, %zmm0, %zmm0 +; AVX512F-NEXT: vpsrld $25, %zmm1, %zmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = +; AVX512F-NEXT: vpshufb %ymm4, %ymm3, %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u> +; AVX512F-NEXT: vpshufb %ymm5, %ymm2, %ymm2 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] +; AVX512F-NEXT: vpshufb %ymm4, %ymm1, %ymm1 +; AVX512F-NEXT: vpshufb %ymm5, %ymm0, %ymm0 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpsrld $25, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrld $25, %zmm1, %zmm1 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = +; AVX512BW-NEXT: vpshufb %ymm3, %ymm2, %ymm2 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u> +; AVX512BW-NEXT: vpshufb %ymm5, %ymm4, %ymm4 +; AVX512BW-NEXT: vpblendd {{.*#+}} ymm2 = ymm4[0],ymm2[1],ymm4[2],ymm2[3],ymm4[4],ymm2[5],ymm4[6],ymm2[7] +; AVX512BW-NEXT: vpshufb %ymm3, %ymm1, %ymm1 +; AVX512BW-NEXT: vpshufb %ymm5, %ymm0, %ymm0 +; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512BW-NEXT: retq +; +; AVX512DQ-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpsrld $25, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrld $25, %zmm1, %zmm1 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm2 +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm4 = +; AVX512DQ-NEXT: vpshufb %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vmovdqa {{.*#+}} ymm5 = <0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u,0,4,8,12,u,u,u,u> +; AVX512DQ-NEXT: vpshufb %ymm5, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3],ymm2[4],ymm3[5],ymm2[6],ymm3[7] +; AVX512DQ-NEXT: vpshufb %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpshufb %ymm5, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512DQ-NEXT: retq +; +; AVX512VBMI-LABEL: shuffle_v64i8_lshr_00_04_08_12_64_68_72_76_00_04_08_12_64_68_72_76_16_20_24_28_80_84_88_092_16_20_24_28_80_84_88_92_32_36_40_44_96_100_104_108_32_36_40_44_96_100_104_108_48_52_56_60_112_116_120_124_48_52_56_60_112_116_120_124: +; AVX512VBMI: # %bb.0: +; AVX512VBMI-NEXT: vpsrld $25, %zmm0, %zmm2 +; AVX512VBMI-NEXT: vpsrld $25, %zmm1, %zmm1 +; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4,8,12,64,68,72,76,0,4,8,12,64,68,72,76,16,20,24,28,80,84,88,92,16,20,24,28,80,84,88,92,32,36,40,44,96,100,104,108,32,36,40,44,96,100,104,108,48,52,56,60,112,116,120,124,48,52,56,60,112,116,120,124] +; AVX512VBMI-NEXT: vpermi2b %zmm1, %zmm2, %zmm0 +; AVX512VBMI-NEXT: retq + %1 = lshr <16 x i32> %a0, + %2 = lshr <16 x i32> %a1, + %3 = bitcast <16 x i32> %1 to <64 x i8> + %4 = bitcast <16 x i32> %2 to <64 x i8> + %5 = shufflevector <64 x i8> %3, <64 x i8> %4, <64 x i32> + ret <64 x i8> %5 +} + define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126(<32 x i16> %a0, <32 x i16> %a1) { ; AVX512F-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126: ; AVX512F: # %bb.0: