48 changes: 24 additions & 24 deletions llvm/test/CodeGen/X86/vector-rotate-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm3
; AVX512F-NEXT: vpsllw $4, %ymm2, %ymm4
; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm5 = [17361641481138401520,17361641481138401520,17361641481138401520,17361641481138401520,17361641481138401520,17361641481138401520,17361641481138401520,17361641481138401520]
; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm5, %zmm4
; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm5 = [4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160]
; AVX512F-NEXT: vpternlogd $226, %zmm3, %zmm5, %zmm4
; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512F-NEXT: vpsllw $5, %ymm3, %ymm3
; AVX512F-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $6, %ymm2, %ymm4
; AVX512F-NEXT: vpsllw $2, %ymm2, %ymm6
; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm7 = [18229723555195321596,18229723555195321596,18229723555195321596,18229723555195321596,18229723555195321596,18229723555195321596,18229723555195321596,18229723555195321596]
; AVX512F-NEXT: vpternlogq $226, %zmm4, %zmm7, %zmm6
; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm7 = [4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268]
; AVX512F-NEXT: vpternlogd $226, %zmm4, %zmm7, %zmm6
; AVX512F-NEXT: vpaddb %ymm3, %ymm3, %ymm3
; AVX512F-NEXT: vpblendvb %ymm3, %ymm6, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $7, %ymm2, %ymm4
Expand All @@ -155,12 +155,12 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512F-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm3
; AVX512F-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm5, %zmm4
; AVX512F-NEXT: vpternlogd $226, %zmm3, %zmm5, %zmm4
; AVX512F-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $6, %ymm0, %ymm3
; AVX512F-NEXT: vpsllw $2, %ymm0, %ymm4
; AVX512F-NEXT: vpternlogq $226, %zmm3, %zmm7, %zmm4
; AVX512F-NEXT: vpternlogd $226, %zmm3, %zmm7, %zmm4
; AVX512F-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm3
Expand All @@ -177,15 +177,15 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm3
; AVX512VL-NEXT: vpsllw $4, %ymm2, %ymm4
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm5 = [17361641481138401520,17361641481138401520,17361641481138401520,17361641481138401520]
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm5, %ymm4
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm5 = [4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160,4042322160]
; AVX512VL-NEXT: vpternlogd $226, %ymm3, %ymm5, %ymm4
; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm3, %ymm3
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw $6, %ymm2, %ymm4
; AVX512VL-NEXT: vpsllw $2, %ymm2, %ymm6
; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm7 = [18229723555195321596,18229723555195321596,18229723555195321596,18229723555195321596]
; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm7, %ymm6
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm7 = [4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268,4244438268]
; AVX512VL-NEXT: vpternlogd $226, %ymm4, %ymm7, %ymm6
; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm3
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw $7, %ymm2, %ymm4
Expand All @@ -196,12 +196,12 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
; AVX512VL-NEXT: vpblendvb %ymm3, %ymm6, %ymm2, %ymm2
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm5, %ymm4
; AVX512VL-NEXT: vpternlogd $226, %ymm3, %ymm5, %ymm4
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm4
; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm7, %ymm4
; AVX512VL-NEXT: vpternlogd $226, %ymm3, %ymm7, %ymm4
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3
Expand Down Expand Up @@ -754,7 +754,7 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: splatconstant_rotate_v64i8:
Expand All @@ -766,35 +766,35 @@ define <64 x i8> @splatconstant_rotate_v64i8(<64 x i8> %a) nounwind {
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512BW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VLBW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VBMI2-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_rotate_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VLVBMI2-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VLVBMI2-NEXT: retq
%shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
%lshr = lshr <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
Expand Down Expand Up @@ -902,7 +902,7 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512F-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
Expand All @@ -915,39 +915,39 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind {
; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
; AVX512VL-NEXT: vpsrlw $4, %ymm2, %ymm2
; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512VL-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VL-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512BW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512BW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VLBW-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VLBW-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VLBW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512VBMI2: # %bb.0:
; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VBMI2-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_rotate_mask_v64i8:
; AVX512VLVBMI2: # %bb.0:
; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm1
; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0
; AVX512VLVBMI2-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0
; AVX512VLVBMI2-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512VLVBMI2-NEXT: retq
%shl = shl <64 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vector-shuffle-v192.ll
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ define <64 x i8> @f2(ptr %p0) {
; AVX512F-NEXT: vpor %xmm1, %xmm2, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm0
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: f2:
Expand Down Expand Up @@ -517,7 +517,7 @@ define <64 x i8> @f4(ptr %p0) {
; AVX512F-NEXT: vpor %xmm1, %xmm2, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
; AVX512F-NEXT: vpternlogq $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm0
; AVX512F-NEXT: vpternlogd $216, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: f4:
Expand Down