Skip to content

Commit

Permalink
[AVX-512] Remove patterns that select vmovdqu8/16 for unmasked loads.…
Browse files Browse the repository at this point in the history
… Prefer vmovdqa64/vmovdqu64 instead.

These were taking priority over the aligned load instructions since there is no vmovda8/16. I don't think there is really a difference between aligned and unaligned on newer cpus so I don't think it matters which instructions we use.

But with this change we reduce the size of the isel table a little and we allow the aligned information to pass through to the evex->vec pass and produce the same output has avx/avx2 in some cases.

I also generally dislike patterns rooted in a bitcast which these were.

Differential Revision: https://reviews.llvm.org/D35977

llvm-svn: 309589
  • Loading branch information
topperc committed Jul 31, 2017
1 parent ed99e4c commit cb0e749
Show file tree
Hide file tree
Showing 38 changed files with 326 additions and 460 deletions.
29 changes: 18 additions & 11 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -3249,6 +3249,7 @@ defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;

multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
PatFrag ld_frag, PatFrag mload,
bit NoRMPattern = 0,
SDPatternOperator SelectOprr = vselect> {
let hasSideEffects = 0 in {
def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
Expand All @@ -3263,11 +3264,13 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
_.ImmAllZerosV)))], _.ExeDomain>,
EVEX, EVEX_KZ;

let canFoldAsLoad = 1, isReMaterializable = 1,
let mayLoad = 1, canFoldAsLoad = 1, isReMaterializable = 1,
SchedRW = [WriteLoad] in
def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
!if(NoRMPattern, [],
[(set _.RC:$dst,
(_.VT (bitconvert (ld_frag addr:$src))))]),
_.ExeDomain>, EVEX;

let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
Expand Down Expand Up @@ -3327,16 +3330,20 @@ multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _,
Predicate prd,
bit NoRMPattern = 0,
SDPatternOperator SelectOprr = vselect> {
let Predicates = [prd] in
defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
masked_load_unaligned, SelectOprr>, EVEX_V512;
masked_load_unaligned, NoRMPattern,
SelectOprr>, EVEX_V512;

let Predicates = [prd, HasVLX] in {
defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
masked_load_unaligned, SelectOprr>, EVEX_V256;
masked_load_unaligned, NoRMPattern,
SelectOprr>, EVEX_V256;
defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
masked_load_unaligned, SelectOprr>, EVEX_V128;
masked_load_unaligned, NoRMPattern,
SelectOprr>, EVEX_V128;
}
}

Expand Down Expand Up @@ -3416,13 +3423,13 @@ defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
PD, VEX_W, EVEX_CD8<64, CD8VF>;

defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
null_frag>,
0, null_frag>,
avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512,
"VMOVUPS">,
PS, EVEX_CD8<32, CD8VF>;

defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
null_frag>,
0, null_frag>,
avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
"VMOVUPD">,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
Expand All @@ -3439,24 +3446,24 @@ defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
HasAVX512, "VMOVDQA64">,
PD, VEX_W, EVEX_CD8<64, CD8VF>;

defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>,
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI, 1>,
avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info,
HasBWI, "VMOVDQU8">,
XD, EVEX_CD8<8, CD8VF>;

defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI, 1>,
avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info,
HasBWI, "VMOVDQU16">,
XD, VEX_W, EVEX_CD8<16, CD8VF>;

defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
null_frag>,
0, null_frag>,
avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
HasAVX512, "VMOVDQU32">,
XS, EVEX_CD8<32, CD8VF>;

defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
null_frag>,
0, null_frag>,
avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
HasAVX512, "VMOVDQU64">,
XS, VEX_W, EVEX_CD8<64, CD8VF>;
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/X86/avg.ll
Expand Up @@ -710,7 +710,7 @@ define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) {
;
; AVX512BW-LABEL: avg_v64i8:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0
; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
; AVX512BW-NEXT: vpavgb (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
; AVX512BW-NEXT: vzeroupper
Expand Down Expand Up @@ -1099,7 +1099,7 @@ define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) {
;
; AVX512BW-LABEL: avg_v32i16:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu16 (%rsi), %zmm0
; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
; AVX512BW-NEXT: vpavgw (%rdi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
; AVX512BW-NEXT: vzeroupper
Expand Down Expand Up @@ -1732,7 +1732,7 @@ define void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) {
;
; AVX512BW-LABEL: avg_v64i8_2:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0
; AVX512BW-NEXT: vmovdqa64 (%rsi), %zmm0
; AVX512BW-NEXT: vpavgb %zmm0, %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
; AVX512BW-NEXT: vzeroupper
Expand Down Expand Up @@ -2122,7 +2122,7 @@ define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) {
;
; AVX512BW-LABEL: avg_v32i16_2:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpavgw (%rsi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
; AVX512BW-NEXT: vzeroupper
Expand Down Expand Up @@ -2647,7 +2647,7 @@ define void @avg_v64i8_const(<64 x i8>* %a) {
;
; AVX512BW-LABEL: avg_v64i8_const:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpavgb {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rax)
; AVX512BW-NEXT: vzeroupper
Expand Down Expand Up @@ -2955,7 +2955,7 @@ define void @avg_v32i16_const(<32 x i16>* %a) {
;
; AVX512BW-LABEL: avg_v32i16_const:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpavgw {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rax)
; AVX512BW-NEXT: vzeroupper
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
Expand Up @@ -763,7 +763,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(<32 x i8>* %ptr, <32 x i8>
; AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; AVX512VL-NEXT: vmovdqu (%eax), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x08]
; AVX512VL-NEXT: vmovdqa (%eax), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08]
; AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0]
; AVX512VL-NEXT: retl ## encoding: [0xc3]
%a0 = load <32 x i8>, <32 x i8>* %ptr
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx512-insert-extract.ll
Expand Up @@ -1295,7 +1295,7 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32>
; SKX-NEXT: vpmovm2w %k0, %zmm0
; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: vpmovm2w %k0, %zmm1
; SKX-NEXT: vmovdqu16 {{.*#+}} zmm2 = [0,1,2,3,32,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,32,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
; SKX-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
; SKX-NEXT: vpmovw2m %zmm2, %k0
; SKX-NEXT: kmovd %k0, %eax
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
Expand Up @@ -53,7 +53,7 @@ declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32)
define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_w_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
; AVX512BW-NEXT: kmovd %edx, %k1
; AVX512BW-NEXT: vmovdqu16 (%rsi), %zmm0 {%k1}
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} {z}
Expand All @@ -64,7 +64,7 @@ define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: vmovdqu16 (%ecx), %zmm0
; AVX512F-32-NEXT: vmovdqu64 (%ecx), %zmm0
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vmovdqu16 (%eax), %zmm0 {%k1}
; AVX512F-32-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z}
Expand All @@ -82,7 +82,7 @@ declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_b_512:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm0
; AVX512BW-NEXT: kmovq %rdx, %k1
; AVX512BW-NEXT: vmovdqu8 (%rsi), %zmm0 {%k1}
; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} {z}
Expand All @@ -93,7 +93,7 @@ define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: vmovdqu8 (%ecx), %zmm0
; AVX512F-32-NEXT: vmovdqu64 (%ecx), %zmm0
; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
; AVX512F-32-NEXT: vmovdqu8 (%eax), %zmm0 {%k1}
; AVX512F-32-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512bw-intrinsics.ll
Expand Up @@ -1710,13 +1710,13 @@ define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16>
define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
; AVX512BW-NEXT: vpsravw {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vmovdqu16 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
; AVX512F-32-NEXT: vpsravw {{\.LCPI.*}}, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9, i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>,
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512bw-mov.ll
Expand Up @@ -4,7 +4,7 @@
define <64 x i8> @test1(i8 * %addr) {
; CHECK-LABEL: test1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%vaddr = bitcast i8* %addr to <64 x i8>*
%res = load <64 x i8>, <64 x i8>* %vaddr, align 1
Expand Down Expand Up @@ -52,7 +52,7 @@ define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
define <32 x i16> @test5(i8 * %addr) {
; CHECK-LABEL: test5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0
; CHECK-NEXT: vmovups (%rdi), %zmm0
; CHECK-NEXT: retq
%vaddr = bitcast i8* %addr to <32 x i16>*
%res = load <32 x i16>, <32 x i16>* %vaddr, align 1
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx512bwvl-mov.ll
Expand Up @@ -4,7 +4,7 @@
define <32 x i8> @test_256_1(i8 * %addr) {
; CHECK-LABEL: test_256_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <32 x i8>*
%res = load <32 x i8>, <32 x i8>* %vaddr, align 1
Expand Down Expand Up @@ -52,7 +52,7 @@ define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
define <16 x i16> @test_256_5(i8 * %addr) {
; CHECK-LABEL: test_256_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
; CHECK-NEXT: vmovups (%rdi), %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i16>*
%res = load <16 x i16>, <16 x i16>* %vaddr, align 1
Expand Down Expand Up @@ -100,7 +100,7 @@ define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
define <16 x i8> @test_128_1(i8 * %addr) {
; CHECK-LABEL: test_128_1:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <16 x i8>*
%res = load <16 x i8>, <16 x i8>* %vaddr, align 1
Expand Down Expand Up @@ -148,7 +148,7 @@ define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
define <8 x i16> @test_128_5(i8 * %addr) {
; CHECK-LABEL: test_128_5:
; CHECK: ## BB#0:
; CHECK-NEXT: vmovdqu (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
; CHECK-NEXT: vmovups (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%vaddr = bitcast i8* %addr to <8 x i16>*
%res = load <8 x i16>, <8 x i16>* %vaddr, align 1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/nontemporal-loads.ll
Expand Up @@ -1750,7 +1750,7 @@ define <32 x i16> @test_unaligned_v32i16(<32 x i16>* %src) {
;
; AVX512BW-LABEL: test_unaligned_v32i16:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
; AVX512BW-NEXT: vmovups (%rdi), %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: test_unaligned_v32i16:
Expand Down Expand Up @@ -1785,7 +1785,7 @@ define <64 x i8> @test_unaligned_v64i8(<64 x i8>* %src) {
;
; AVX512BW-LABEL: test_unaligned_v64i8:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu8 (%rdi), %zmm0
; AVX512BW-NEXT: vmovups (%rdi), %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VL-LABEL: test_unaligned_v64i8:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/pmul.ll
Expand Up @@ -921,7 +921,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind {
; AVX512BW-LABEL: mul_v64i8c:
; AVX512BW: # BB#0: # %entry
; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1
; AVX512BW-NEXT: vmovdqu16 {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117]
; AVX512BW-NEXT: vpmullw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/sad.ll
Expand Up @@ -814,7 +814,7 @@ define i32 @sad_avx64i8() nounwind {
; AVX512BW-NEXT: .p2align 4, 0x90
; AVX512BW-NEXT: .LBB2_1: # %vector.body
; AVX512BW-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512BW-NEXT: vmovdqu8 a+1024(%rax), %zmm2
; AVX512BW-NEXT: vmovdqa64 a+1024(%rax), %zmm2
; AVX512BW-NEXT: vpsadbw b+1024(%rax), %zmm2, %zmm2
; AVX512BW-NEXT: vpaddd %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: addq $4, %rax
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/shuffle-vs-trunc-128.ll
Expand Up @@ -58,7 +58,7 @@ define void @shuffle_v16i8_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v16i8_to_v8i8:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu (%rdi), %xmm0
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
; AVX512BWVL-NEXT: retq
%vec = load <16 x i8>, <16 x i8>* %L
Expand Down Expand Up @@ -113,7 +113,7 @@ define void @trunc_v8i16_to_v8i8(<16 x i8>* %L, <8 x i8>* %S) nounwind {
;
; AVX512BWVL-LABEL: trunc_v8i16_to_v8i8:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu (%rdi), %xmm0
; AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
; AVX512BWVL-NEXT: retq
%vec = load <16 x i8>, <16 x i8>* %L
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
Expand Up @@ -61,7 +61,7 @@ define void @shuffle_v32i8_to_v16i8(<32 x i8>* %L, <16 x i8>* %S) nounwind {
;
; AVX512BWVL-LABEL: shuffle_v32i8_to_v16i8:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
Expand Down Expand Up @@ -122,7 +122,7 @@ define void @trunc_v16i16_to_v16i8(<32 x i8>* %L, <16 x i8>* %S) nounwind {
;
; AVX512BWVL-LABEL: trunc_v16i16_to_v16i8:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu (%rdi), %ymm0
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
Expand Up @@ -33,14 +33,14 @@ define void @shuffle_v64i8_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
;
; AVX512BW-LABEL: shuffle_v64i8_to_v32i8:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, (%rsi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: shuffle_v64i8_to_v32i8:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu16 (%rdi), %zmm0
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
Expand Down Expand Up @@ -75,14 +75,14 @@ define void @trunc_v32i16_to_v32i8(<64 x i8>* %L, <32 x i8>* %S) nounwind {
;
; AVX512BW-LABEL: trunc_v32i16_to_v32i8:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu16 (%rdi), %zmm0
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BW-NEXT: vpmovwb %zmm0, (%rsi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_v32i16_to_v32i8:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqu16 (%rdi), %zmm0
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll
Expand Up @@ -52,7 +52,7 @@ define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
; SKX: ## BB#0:
; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; SKX-NEXT: vmovdqu (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x00]
; SKX-NEXT: vmovdqa (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
; SKX-NEXT: movl $7, %eax ## encoding: [0xb8,0x07,0x00,0x00,0x00]
; SKX-NEXT: movl $7, %edx ## encoding: [0xba,0x07,0x00,0x00,0x00]
; SKX-NEXT: vpcmpestri $7, (%ecx), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x61,0x01,0x07]
Expand Down Expand Up @@ -298,7 +298,7 @@ define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
; SKX: ## BB#0:
; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
; SKX-NEXT: vmovdqu (%ecx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
; SKX-NEXT: vmovdqa (%ecx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x01]
; SKX-NEXT: vpcmpistri $7, (%eax), %xmm0 ## encoding: [0xc4,0xe3,0x79,0x63,0x00,0x07]
; SKX-NEXT: movl %ecx, %eax ## encoding: [0x89,0xc8]
; SKX-NEXT: retl ## encoding: [0xc3]
Expand Down

0 comments on commit cb0e749

Please sign in to comment.