diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp index db692341617748..16bff201dd0389 100644 --- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp +++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp @@ -393,12 +393,12 @@ MachineInstr *FixupBWInstPass::tryReplaceInstr(MachineInstr *MI, switch (MI->getOpcode()) { case X86::MOV8rm: - // Replace 8-bit loads with the zero-extending version if not optimizing - // for size. The extending op is cheaper across a wide range of uarch and - // it avoids a potentially expensive partial register stall. It takes an - // extra byte to encode, however, so don't do this when optimizing for size. - if (!OptForSize) - return tryReplaceLoad(X86::MOVZX32rm8, MI); + // Only replace 8 bit loads with the zero extending versions if + // in an inner most loop and not optimizing for size. This takes + // an extra byte to encode, and provides limited performance upside. + if (MachineLoop *ML = MLI->getLoopFor(&MBB)) + if (ML->begin() == ML->end() && !OptForSize) + return tryReplaceLoad(X86::MOVZX32rm8, MI); break; case X86::MOV16rm: diff --git a/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll index 6c4254314009f8..54ccbf504a5c3a 100644 --- a/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll +++ b/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll @@ -11,7 +11,7 @@ target triple = "i686-unknown-unknown" define i32 @test5(i32 %B, i8 %C) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl A, %eax ; CHECK-NEXT: shldl %cl, %edx, %eax diff --git a/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll index 16ef67724f883d..61f97a0224c75d 100644 --- a/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll @@ -10,7 +10,7 @@ define void @test() { ; CHECK: # %bb.0: ; CHECK-NEXT: movl A, %eax ; CHECK-NEXT: movzwl 2(%eax), %eax -; CHECK-NEXT: movzbl B, %ecx +; CHECK-NEXT: movb B, %cl ; CHECK-NEXT: movl C, %edx ; CHECK-NEXT: andb $16, %cl ; CHECK-NEXT: shll %cl, %edx diff --git a/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll index 313fbbac05fd0f..1fbba1639b9636 100644 --- a/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll +++ b/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll @@ -9,8 +9,8 @@ define void @handle_vector_size_attribute() nounwind { ; CHECK-NEXT: cmpl $1, %eax ; CHECK-NEXT: ja .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb77 -; CHECK-NEXT: movzbl 0, %eax -; CHECK-NEXT: movzbl 0, %eax +; CHECK-NEXT: movb 0, %al +; CHECK-NEXT: movb 0, %al ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: .LBB0_2: # %bb84 diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll index b0b8771a7d5125..3c119c2f09dac6 100644 --- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll +++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll @@ -69,11 +69,11 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %rbp ; CHECK-NEXT: movq (%rbp), %rax ; CHECK-NEXT: callq *216(%rax) -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl ; CHECK-NEXT: testb %dl, %dl ; CHECK-NEXT: je LBB0_11 ; CHECK-NEXT: ## %bb.7: ## %cond_false.i -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-NEXT: movzbl %bl, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: divb %dl @@ -98,8 +98,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: LBB0_11: ## %cond_true.i ; CHECK-NEXT: movl $4, %edi ; CHECK-NEXT: callq _feraiseexcept -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-NEXT: xorl %r14d, %r14d ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: je LBB0_14 diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll index 6d596195fe7f69..3a700db4b67210 100644 --- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll @@ -31,7 +31,7 @@ define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(ptr noalias sret(%struct ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl ; CHECK-NEXT: testb $1, %bl ; CHECK-NEXT: je LBB0_25 ; CHECK-NEXT: ## %bb.1: ## %bb116.i diff --git a/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll index 50e2aac794a85d..92cebeeccb3e7a 100644 --- a/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll +++ b/llvm/test/CodeGen/X86/2008-04-24-MemCpyBug.ll @@ -17,7 +17,7 @@ define void @testit63_entry_2E_ce() nounwind { ; CHECK-NEXT: movl %esp, %edi ; CHECK-NEXT: movl $g1s63, %esi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) -; CHECK-NEXT: movzbl g1s63+62, %eax +; CHECK-NEXT: movb g1s63+62, %al ; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl g1s63+60, %eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll index 53175413980f10..816ae23d2fe26c 100644 --- a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll +++ b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll @@ -15,7 +15,7 @@ define i32 @func_44(i16 signext %p_46) nounwind { ; SOURCE-SCHED-NEXT: xorl %ecx, %ecx ; SOURCE-SCHED-NEXT: cmpl $2, %eax ; SOURCE-SCHED-NEXT: setge %cl -; SOURCE-SCHED-NEXT: movzbl g_73, %edx +; SOURCE-SCHED-NEXT: movb g_73, %dl ; SOURCE-SCHED-NEXT: xorl %eax, %eax ; SOURCE-SCHED-NEXT: subb {{[0-9]+}}(%esp), %al ; SOURCE-SCHED-NEXT: testb %dl, %dl diff --git a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll index f782c530d95664..ca8df1b6084aee 100644 --- a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll +++ b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll @@ -1,44 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-- -mcpu=core2 | FileCheck %s +; RUN: llc < %s -mcpu=core2 | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.4" declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind define fastcc i32 @cli_magic_scandesc(ptr %in) nounwind ssp { -; CHECK-LABEL: cli_magic_scandesc: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $72, %rsp -; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax -; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl (%rsp), %eax -; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK-NEXT: movq (%rdi), %rdx -; CHECK-NEXT: movq 8(%rdi), %rsi -; CHECK-NEXT: movq %rdx, (%rsp) -; CHECK-NEXT: movq 24(%rdi), %rdx -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq 16(%rdi), %rdx -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq 32(%rdi), %rdx -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq 40(%rdi), %rdx -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq 48(%rdi), %rdx -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq 56(%rdi), %rdx -; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %al, (%rsp) -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq __stack_chk_guard(%rip), %rax -; CHECK-NEXT: cmpq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: addq $72, %rsp -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_2: # %entry -; CHECK-NEXT: callq __stack_chk_fail@PLT entry: %a = alloca [64 x i8] %c = getelementptr inbounds [64 x i8], ptr %a, i64 0, i32 30 @@ -49,3 +15,10 @@ entry: store i8 %e, ptr %c, align 8 ret i32 0 } + +; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip) +; CHECK: movb (%rsp), [[R1:%.+]] +; CHECK: movb 30(%rsp), [[R0:%.+]] +; CHECK: movb [[R1]], (%rsp) +; CHECK: movb [[R0]], 30(%rsp) +; CHECK: callq ___stack_chk_fail diff --git a/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll b/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll index 1a8d33f5b3480f..b8b3440f3aa54a 100644 --- a/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll +++ b/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll @@ -233,7 +233,7 @@ define i8 @neg_type_mismatch(i32 %a1_wide_orig, i16 %a2_wide_orig, i32 %inc) nou define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounwind { ; I386-NOCMOV-LABEL: negative_CopyFromReg: ; I386-NOCMOV: # %bb.0: -; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al ; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; I386-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx ; I386-NOCMOV-NEXT: cmpb %cl, %al @@ -255,7 +255,7 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounw ; ; I686-NOCMOV-LABEL: negative_CopyFromReg: ; I686-NOCMOV: # %bb.0: -; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al ; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx ; I686-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx ; I686-NOCMOV-NEXT: cmpb %cl, %al @@ -297,8 +297,8 @@ define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounw define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind { ; I386-NOCMOV-LABEL: negative_CopyFromRegs: ; I386-NOCMOV: # %bb.0: -; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; I386-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl +; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al ; I386-NOCMOV-NEXT: cmpb %cl, %al ; I386-NOCMOV-NEXT: jg .LBB4_2 ; I386-NOCMOV-NEXT: # %bb.1: @@ -317,8 +317,8 @@ define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind { ; ; I686-NOCMOV-LABEL: negative_CopyFromRegs: ; I686-NOCMOV: # %bb.0: -; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; I686-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl +; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al ; I686-NOCMOV-NEXT: cmpb %cl, %al ; I686-NOCMOV-NEXT: jg .LBB4_2 ; I686-NOCMOV-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll index 33d4de16c97727..fe67dedff9cc37 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/callingconv.ll @@ -324,7 +324,7 @@ define void @test_abi_exts_call(ptr %addr) { ; X32-NEXT: .cfi_offset %esi, -12 ; X32-NEXT: .cfi_offset %ebx, -8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movzbl (%eax), %ebx +; X32-NEXT: movb (%eax), %bl ; X32-NEXT: movzbl %bl, %esi ; X32-NEXT: movl %esi, (%esp) ; X32-NEXT: calll take_char @@ -346,7 +346,7 @@ define void @test_abi_exts_call(ptr %addr) { ; X64-NEXT: pushq %rbx ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 -; X64-NEXT: movzbl (%rdi), %eax +; X64-NEXT: movb (%rdi), %al ; X64-NEXT: movzbl %al, %ebx ; X64-NEXT: movl %ebx, %edi ; X64-NEXT: callq take_char diff --git a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll index f92537ad170ffe..9ed66face45c95 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar-x32.ll @@ -8,7 +8,7 @@ define i1 @test_load_i1(ptr %p1) { ; CHECK-LABEL: test_load_i1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl 4(%esp), %eax -; CHECK-NEXT: movzbl (%eax), %eax +; CHECK-NEXT: movb (%eax), %al ; CHECK-NEXT: retl %r = load i1, ptr %p1 ret i1 %r @@ -18,7 +18,7 @@ define i8 @test_load_i8(ptr %p1) { ; CHECK-LABEL: test_load_i8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl 4(%esp), %eax -; CHECK-NEXT: movzbl (%eax), %eax +; CHECK-NEXT: movb (%eax), %al ; CHECK-NEXT: retl %r = load i8, ptr %p1 ret i8 %r diff --git a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll index 033672fb1fc213..38f619f4937baa 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/memop-scalar.ll @@ -5,7 +5,7 @@ define i1 @test_load_i1(ptr %p1) { ; ALL-LABEL: test_load_i1: ; ALL: # %bb.0: -; ALL-NEXT: movzbl (%rdi), %eax +; ALL-NEXT: movb (%rdi), %al ; ALL-NEXT: retq %r = load i1, ptr %p1 ret i1 %r @@ -14,7 +14,7 @@ define i1 @test_load_i1(ptr %p1) { define i8 @test_load_i8(ptr %p1) { ; ALL-LABEL: test_load_i8: ; ALL: # %bb.0: -; ALL-NEXT: movzbl (%rdi), %eax +; ALL-NEXT: movb (%rdi), %al ; ALL-NEXT: retq %r = load i8, ptr %p1 ret i8 %r diff --git a/llvm/test/CodeGen/X86/PR40322.ll b/llvm/test/CodeGen/X86/PR40322.ll index 49709cb9b88f88..298d827ea43a86 100644 --- a/llvm/test/CodeGen/X86/PR40322.ll +++ b/llvm/test/CodeGen/X86/PR40322.ll @@ -15,7 +15,7 @@ define void @_Z2ami(i32) #0 personality ptr @__gxx_personality_v0 { ; CHECK-MINGW-X86-NEXT: .cfi_def_cfa_offset 12 ; CHECK-MINGW-X86-NEXT: .cfi_offset %esi, -12 ; CHECK-MINGW-X86-NEXT: .cfi_offset %edi, -8 -; CHECK-MINGW-X86-NEXT: movzbl __ZGVZ2amiE2au, %eax +; CHECK-MINGW-X86-NEXT: movb __ZGVZ2amiE2au, %al ; CHECK-MINGW-X86-NEXT: testb %al, %al ; CHECK-MINGW-X86-NEXT: jne LBB0_4 ; CHECK-MINGW-X86-NEXT: # %bb.1: # %init.check diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll index 80a4eb6ccba14e..b8264835cc01e0 100644 --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -35,7 +35,7 @@ define i8 @test_i8(i8 %a) nounwind { ; ; X86-LABEL: test_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarb $7, %cl ; X86-NEXT: xorb %cl, %al @@ -530,13 +530,13 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind { ; X86-NEXT: xorb %al, %bh ; X86-NEXT: subb %al, %bh ; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarb $7, %al ; X86-NEXT: xorb %al, %cl ; X86-NEXT: subb %al, %cl ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarb $7, %al ; X86-NEXT: xorb %al, %cl @@ -572,7 +572,7 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind { ; X86-NEXT: sarb $7, %al ; X86-NEXT: xorb %al, %cl ; X86-NEXT: subb %al, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movb %al, %ah ; X86-NEXT: sarb $7, %ah ; X86-NEXT: xorb %ah, %al @@ -585,23 +585,23 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind { ; X86-NEXT: movb %dh, 11(%esi) ; X86-NEXT: movb %bl, 10(%esi) ; X86-NEXT: movb %bh, 9(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, 8(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, 7(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, 6(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, 5(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, 4(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, 3(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, 2(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, 1(%esi) -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; X86-NEXT: movb %al, (%esi) ; X86-NEXT: movl %esi, %eax ; X86-NEXT: addl $12, %esp diff --git a/llvm/test/CodeGen/X86/add-sub-bool.ll b/llvm/test/CodeGen/X86/add-sub-bool.ll index 17eda596601934..248a9f19f1fbeb 100644 --- a/llvm/test/CodeGen/X86/add-sub-bool.ll +++ b/llvm/test/CodeGen/X86/add-sub-bool.ll @@ -390,7 +390,7 @@ define i64 @test_i64_add_add_var(i64 %x, i64 %y, i64 %z, i64 %w) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -529,7 +529,7 @@ define i32 @test_i32_sub_add_sext_var(i32 %x, i32 %y, i32 %z, i32 %w) nounwind { ; X86-LABEL: test_i32_sub_add_sext_var: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: shll %cl, %edx ; X86-NEXT: sarl $31, %edx diff --git a/llvm/test/CodeGen/X86/and-load-fold.ll b/llvm/test/CodeGen/X86/and-load-fold.ll index b0819cd2887754..f01c8b0526fe49 100644 --- a/llvm/test/CodeGen/X86/and-load-fold.ll +++ b/llvm/test/CodeGen/X86/and-load-fold.ll @@ -6,7 +6,7 @@ define i8 @foo(ptr %V) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl 2(%rdi), %eax +; CHECK-NEXT: movb 2(%rdi), %al ; CHECK-NEXT: andb $95, %al ; CHECK-NEXT: retq %V3i8 = load <3 x i8>, ptr %V, align 4 diff --git a/llvm/test/CodeGen/X86/and-sink.ll b/llvm/test/CodeGen/X86/and-sink.ll index 002d1cdf271b9b..ccf35be439c34f 100644 --- a/llvm/test/CodeGen/X86/and-sink.ll +++ b/llvm/test/CodeGen/X86/and-sink.ll @@ -51,7 +51,7 @@ define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) { ; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp) ; CHECK-NEXT: je .LBB1_5 ; CHECK-NEXT: # %bb.1: # %bb0.preheader -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB1_2: # %bb0 diff --git a/llvm/test/CodeGen/X86/and-with-overflow.ll b/llvm/test/CodeGen/X86/and-with-overflow.ll index a63f6cc6ea7e26..aba73de1e77191 100644 --- a/llvm/test/CodeGen/X86/and-with-overflow.ll +++ b/llvm/test/CodeGen/X86/and-with-overflow.ll @@ -9,7 +9,7 @@ define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) { ; X86-LABEL: and_i8_ri: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andb $-17, %cl ; X86-NEXT: je .LBB0_2 @@ -35,8 +35,8 @@ define i8 @and_i8_ri(i8 zeroext %0, i8 zeroext %1) { define i8 @and_i8_rr(i8 zeroext %0, i8 zeroext %1) { ; X86-LABEL: and_i8_rr: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb %al, %cl ; X86-NEXT: je .LBB1_2 ; X86-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll index 9d57c9cb2b423e..1d3183e6e4dbcf 100644 --- a/llvm/test/CodeGen/X86/arg-copy-elide.ll +++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll @@ -74,7 +74,7 @@ define i1 @i1_arg(i1 %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %eax -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl ; CHECK-NEXT: movl %ebx, %eax ; CHECK-NEXT: andb $1, %al ; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) @@ -403,9 +403,9 @@ define i1 @use_i3(i3 %a1, i3 %a2) { ; CHECK-LABEL: use_i3: ; CHECK: # %bb.0: ; CHECK-NEXT: pushl %eax -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK-NEXT: andb $7, %al -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: andb $7, %cl ; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) ; CHECK-NEXT: cmpb %cl, %al diff --git a/llvm/test/CodeGen/X86/atom-cmpb.ll b/llvm/test/CodeGen/X86/atom-cmpb.ll index 46ac6e416738f2..b21a1fe63be7b8 100644 --- a/llvm/test/CodeGen/X86/atom-cmpb.ll +++ b/llvm/test/CodeGen/X86/atom-cmpb.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=i686-- -mcpu=atom | FileCheck %s ; CHECK: movl -; CHECK: movzbl -; CHECK: movzbl +; CHECK: movb +; CHECK: movb ; CHECK: cmpb ; CHECK: notb ; CHECK: notb diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll index 714f2912086cc8..19a3de4875516e 100644 --- a/llvm/test/CodeGen/X86/atomic-idempotent.ll +++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll @@ -15,14 +15,14 @@ define i8 @add8(ptr %p) { ; X64-LABEL: add8: ; X64: # %bb.0: ; X64-NEXT: mfence -; X64-NEXT: movzbl (%rdi), %eax +; X64-NEXT: movb (%rdi), %al ; X64-NEXT: retq ; ; X86-SSE2-LABEL: add8: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: mfence -; X86-SSE2-NEXT: movzbl (%eax), %eax +; X86-SSE2-NEXT: movb (%eax), %al ; X86-SSE2-NEXT: retl ; ; X86-SLM-LABEL: add8: diff --git a/llvm/test/CodeGen/X86/atomic-mi.ll b/llvm/test/CodeGen/X86/atomic-mi.ll index 0d0108f55f2ab5..7077f4b12d9232 100644 --- a/llvm/test/CodeGen/X86/atomic-mi.ll +++ b/llvm/test/CodeGen/X86/atomic-mi.ll @@ -182,7 +182,7 @@ define void @add_8r(ptr %p, i8 %v) { ; ; X32-LABEL: add_8r: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addb %al, (%ecx) ; X32-NEXT: retl @@ -437,7 +437,7 @@ define void @sub_8r(ptr %p, i8 %v) { ; ; X32-LABEL: sub_8r: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: subb %al, (%ecx) ; X32-NEXT: retl @@ -616,7 +616,7 @@ define void @and_8r(ptr %p, i8 %v) { ; ; X32-LABEL: and_8r: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: andb %al, (%ecx) ; X32-NEXT: retl @@ -837,7 +837,7 @@ define void @or_8r(ptr %p, i8 %v) { ; ; X32-LABEL: or_8r: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: orb %al, (%ecx) ; X32-NEXT: retl @@ -1059,7 +1059,7 @@ define void @xor_8r(ptr %p, i8 %v) { ; ; X32-LABEL: xor_8r: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorb %al, (%ecx) ; X32-NEXT: retl diff --git a/llvm/test/CodeGen/X86/atomic-monotonic.ll b/llvm/test/CodeGen/X86/atomic-monotonic.ll index 963825ec48e40b..d82e7191803d58 100644 --- a/llvm/test/CodeGen/X86/atomic-monotonic.ll +++ b/llvm/test/CodeGen/X86/atomic-monotonic.ll @@ -3,15 +3,10 @@ ; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefixes=CHECK,CHECK-O3 %s define i8 @load_i8(ptr %ptr) { -; CHECK-O0-LABEL: load_i8: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movb (%rdi), %al -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_i8: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movzbl (%rdi), %eax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: movb (%rdi), %al +; CHECK-NEXT: retq %v = load atomic i8, ptr %ptr monotonic, align 1 ret i8 %v } diff --git a/llvm/test/CodeGen/X86/atomic-unordered.ll b/llvm/test/CodeGen/X86/atomic-unordered.ll index 88ad8c42ff252e..0162a0e66ec3cb 100644 --- a/llvm/test/CodeGen/X86/atomic-unordered.ll +++ b/llvm/test/CodeGen/X86/atomic-unordered.ll @@ -5,15 +5,10 @@ ; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s define i8 @load_i8(i8* %ptr) { -; CHECK-O0-LABEL: load_i8: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movb (%rdi), %al -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_i8: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movzbl (%rdi), %eax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: movb (%rdi), %al +; CHECK-NEXT: retq %v = load atomic i8, i8* %ptr unordered, align 1 ret i8 %v } diff --git a/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll b/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll index da9d16ca9cf726..b23c26800b2984 100644 --- a/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll +++ b/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll @@ -30,9 +30,9 @@ define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-NEXT: movl %eax, 24(%rdi) ; CHECK-NEXT: movzwl -4(%rdi), %eax ; CHECK-NEXT: movw %ax, 28(%rdi) -; CHECK-NEXT: movzbl -2(%rdi), %eax +; CHECK-NEXT: movb -2(%rdi), %al ; CHECK-NEXT: movb %al, 30(%rdi) -; CHECK-NEXT: movzbl -1(%rdi), %eax +; CHECK-NEXT: movb -1(%rdi), %al ; CHECK-NEXT: movb %al, 31(%rdi) ; CHECK-NEXT: retq ; @@ -68,9 +68,9 @@ define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX2-NEXT: movl %eax, 24(%rdi) ; CHECK-AVX2-NEXT: movzwl -4(%rdi), %eax ; CHECK-AVX2-NEXT: movw %ax, 28(%rdi) -; CHECK-AVX2-NEXT: movzbl -2(%rdi), %eax +; CHECK-AVX2-NEXT: movb -2(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 30(%rdi) -; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax +; CHECK-AVX2-NEXT: movb -1(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) ; CHECK-AVX2-NEXT: retq ; @@ -93,9 +93,9 @@ define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX512-NEXT: movl %eax, 24(%rdi) ; CHECK-AVX512-NEXT: movzwl -4(%rdi), %eax ; CHECK-AVX512-NEXT: movw %ax, 28(%rdi) -; CHECK-AVX512-NEXT: movzbl -2(%rdi), %eax +; CHECK-AVX512-NEXT: movb -2(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 30(%rdi) -; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax +; CHECK-AVX512-NEXT: movb -1(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) ; CHECK-AVX512-NEXT: retq entry: @@ -223,9 +223,9 @@ define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-NEXT: movw %ax, 24(%rdi) ; CHECK-NEXT: movl -6(%rdi), %eax ; CHECK-NEXT: movl %eax, 26(%rdi) -; CHECK-NEXT: movzbl -2(%rdi), %eax +; CHECK-NEXT: movb -2(%rdi), %al ; CHECK-NEXT: movb %al, 30(%rdi) -; CHECK-NEXT: movzbl -1(%rdi), %eax +; CHECK-NEXT: movb -1(%rdi), %al ; CHECK-NEXT: movb %al, 31(%rdi) ; CHECK-NEXT: retq ; @@ -265,9 +265,9 @@ define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX2-NEXT: movw %ax, 24(%rdi) ; CHECK-AVX2-NEXT: movl -6(%rdi), %eax ; CHECK-AVX2-NEXT: movl %eax, 26(%rdi) -; CHECK-AVX2-NEXT: movzbl -2(%rdi), %eax +; CHECK-AVX2-NEXT: movb -2(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 30(%rdi) -; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax +; CHECK-AVX2-NEXT: movb -1(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) ; CHECK-AVX2-NEXT: retq ; @@ -294,9 +294,9 @@ define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX512-NEXT: movw %ax, 24(%rdi) ; CHECK-AVX512-NEXT: movl -6(%rdi), %eax ; CHECK-AVX512-NEXT: movl %eax, 26(%rdi) -; CHECK-AVX512-NEXT: movzbl -2(%rdi), %eax +; CHECK-AVX512-NEXT: movb -2(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 30(%rdi) -; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax +; CHECK-AVX512-NEXT: movb -1(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) ; CHECK-AVX512-NEXT: retq entry: @@ -327,7 +327,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-NEXT: movl $0, -11(%rdi) ; CHECK-NEXT: movl -16(%rdi), %eax ; CHECK-NEXT: movl %eax, 16(%rdi) -; CHECK-NEXT: movzbl -12(%rdi), %eax +; CHECK-NEXT: movb -12(%rdi), %al ; CHECK-NEXT: movb %al, 20(%rdi) ; CHECK-NEXT: movl -11(%rdi), %eax ; CHECK-NEXT: movl %eax, 21(%rdi) @@ -335,7 +335,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-NEXT: movl %eax, 25(%rdi) ; CHECK-NEXT: movzwl -3(%rdi), %eax ; CHECK-NEXT: movw %ax, 29(%rdi) -; CHECK-NEXT: movzbl -1(%rdi), %eax +; CHECK-NEXT: movb -1(%rdi), %al ; CHECK-NEXT: movb %al, 31(%rdi) ; CHECK-NEXT: retq ; @@ -361,7 +361,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX2-NEXT: movl $0, -11(%rdi) ; CHECK-AVX2-NEXT: movl -16(%rdi), %eax ; CHECK-AVX2-NEXT: movl %eax, 16(%rdi) -; CHECK-AVX2-NEXT: movzbl -12(%rdi), %eax +; CHECK-AVX2-NEXT: movb -12(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 20(%rdi) ; CHECK-AVX2-NEXT: movl -11(%rdi), %eax ; CHECK-AVX2-NEXT: movl %eax, 21(%rdi) @@ -369,7 +369,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX2-NEXT: movl %eax, 25(%rdi) ; CHECK-AVX2-NEXT: movzwl -3(%rdi), %eax ; CHECK-AVX2-NEXT: movw %ax, 29(%rdi) -; CHECK-AVX2-NEXT: movzbl -1(%rdi), %eax +; CHECK-AVX2-NEXT: movb -1(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 31(%rdi) ; CHECK-AVX2-NEXT: retq ; @@ -383,7 +383,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX512-NEXT: movl $0, -11(%rdi) ; CHECK-AVX512-NEXT: movl -16(%rdi), %eax ; CHECK-AVX512-NEXT: movl %eax, 16(%rdi) -; CHECK-AVX512-NEXT: movzbl -12(%rdi), %eax +; CHECK-AVX512-NEXT: movb -12(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 20(%rdi) ; CHECK-AVX512-NEXT: movl -11(%rdi), %eax ; CHECK-AVX512-NEXT: movl %eax, 21(%rdi) @@ -391,7 +391,7 @@ define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX512-NEXT: movl %eax, 25(%rdi) ; CHECK-AVX512-NEXT: movzwl -3(%rdi), %eax ; CHECK-AVX512-NEXT: movw %ax, 29(%rdi) -; CHECK-AVX512-NEXT: movzbl -1(%rdi), %eax +; CHECK-AVX512-NEXT: movb -1(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 31(%rdi) ; CHECK-AVX512-NEXT: retq entry: @@ -420,11 +420,11 @@ define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-NEXT: movb $0, -11(%rdi) ; CHECK-NEXT: movzwl -16(%rdi), %eax ; CHECK-NEXT: movw %ax, 16(%rdi) -; CHECK-NEXT: movzbl -14(%rdi), %eax +; CHECK-NEXT: movb -14(%rdi), %al ; CHECK-NEXT: movb %al, 18(%rdi) ; CHECK-NEXT: movzwl -13(%rdi), %eax ; CHECK-NEXT: movw %ax, 19(%rdi) -; CHECK-NEXT: movzbl -11(%rdi), %eax +; CHECK-NEXT: movb -11(%rdi), %al ; CHECK-NEXT: movb %al, 21(%rdi) ; CHECK-NEXT: movq -10(%rdi), %rax ; CHECK-NEXT: movq %rax, 22(%rdi) @@ -454,11 +454,11 @@ define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX2-NEXT: movb $0, -11(%rdi) ; CHECK-AVX2-NEXT: movzwl -16(%rdi), %eax ; CHECK-AVX2-NEXT: movw %ax, 16(%rdi) -; CHECK-AVX2-NEXT: movzbl -14(%rdi), %eax +; CHECK-AVX2-NEXT: movb -14(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 18(%rdi) ; CHECK-AVX2-NEXT: movzwl -13(%rdi), %eax ; CHECK-AVX2-NEXT: movw %ax, 19(%rdi) -; CHECK-AVX2-NEXT: movzbl -11(%rdi), %eax +; CHECK-AVX2-NEXT: movb -11(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 21(%rdi) ; CHECK-AVX2-NEXT: movq -10(%rdi), %rax ; CHECK-AVX2-NEXT: movq %rax, 22(%rdi) @@ -476,11 +476,11 @@ define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_ad ; CHECK-AVX512-NEXT: movb $0, -11(%rdi) ; CHECK-AVX512-NEXT: movzwl -16(%rdi), %eax ; CHECK-AVX512-NEXT: movw %ax, 16(%rdi) -; CHECK-AVX512-NEXT: movzbl -14(%rdi), %eax +; CHECK-AVX512-NEXT: movb -14(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 18(%rdi) ; CHECK-AVX512-NEXT: movzwl -13(%rdi), %eax ; CHECK-AVX512-NEXT: movw %ax, 19(%rdi) -; CHECK-AVX512-NEXT: movzbl -11(%rdi), %eax +; CHECK-AVX512-NEXT: movb -11(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 21(%rdi) ; CHECK-AVX512-NEXT: movq -10(%rdi), %rax ; CHECK-AVX512-NEXT: movq %rax, 22(%rdi) diff --git a/llvm/test/CodeGen/X86/avoid-sfb.ll b/llvm/test/CodeGen/X86/avoid-sfb.ll index 9929c8839797df..e43fdfca46813d 100644 --- a/llvm/test/CodeGen/X86/avoid-sfb.ll +++ b/llvm/test/CodeGen/X86/avoid-sfb.ll @@ -436,13 +436,13 @@ define void @test_mixed_type(ptr nocapture noalias %s1, ptr nocapture %s2, i32 % ; CHECK-NEXT: .LBB5_2: # %if.end ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: movq %rax, (%rsi) -; CHECK-NEXT: movzbl 8(%rdi), %eax +; CHECK-NEXT: movb 8(%rdi), %al ; CHECK-NEXT: movb %al, 8(%rsi) ; CHECK-NEXT: movl 9(%rdi), %eax ; CHECK-NEXT: movl %eax, 9(%rsi) ; CHECK-NEXT: movzwl 13(%rdi), %eax ; CHECK-NEXT: movw %ax, 13(%rsi) -; CHECK-NEXT: movzbl 15(%rdi), %eax +; CHECK-NEXT: movb 15(%rdi), %al ; CHECK-NEXT: movb %al, 15(%rsi) ; CHECK-NEXT: retq ; @@ -470,13 +470,13 @@ define void @test_mixed_type(ptr nocapture noalias %s1, ptr nocapture %s2, i32 % ; CHECK-AVX2-NEXT: .LBB5_2: # %if.end ; CHECK-AVX2-NEXT: movq (%rdi), %rax ; CHECK-AVX2-NEXT: movq %rax, (%rsi) -; CHECK-AVX2-NEXT: movzbl 8(%rdi), %eax +; CHECK-AVX2-NEXT: movb 8(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 8(%rsi) ; CHECK-AVX2-NEXT: movl 9(%rdi), %eax ; CHECK-AVX2-NEXT: movl %eax, 9(%rsi) ; CHECK-AVX2-NEXT: movzwl 13(%rdi), %eax ; CHECK-AVX2-NEXT: movw %ax, 13(%rsi) -; CHECK-AVX2-NEXT: movzbl 15(%rdi), %eax +; CHECK-AVX2-NEXT: movb 15(%rdi), %al ; CHECK-AVX2-NEXT: movb %al, 15(%rsi) ; CHECK-AVX2-NEXT: retq ; @@ -491,13 +491,13 @@ define void @test_mixed_type(ptr nocapture noalias %s1, ptr nocapture %s2, i32 % ; CHECK-AVX512-NEXT: .LBB5_2: # %if.end ; CHECK-AVX512-NEXT: movq (%rdi), %rax ; CHECK-AVX512-NEXT: movq %rax, (%rsi) -; CHECK-AVX512-NEXT: movzbl 8(%rdi), %eax +; CHECK-AVX512-NEXT: movb 8(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 8(%rsi) ; CHECK-AVX512-NEXT: movl 9(%rdi), %eax ; CHECK-AVX512-NEXT: movl %eax, 9(%rsi) ; CHECK-AVX512-NEXT: movzwl 13(%rdi), %eax ; CHECK-AVX512-NEXT: movw %ax, 13(%rsi) -; CHECK-AVX512-NEXT: movzbl 15(%rdi), %eax +; CHECK-AVX512-NEXT: movb 15(%rdi), %al ; CHECK-AVX512-NEXT: movb %al, 15(%rsi) ; CHECK-AVX512-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll index 7a5baf15fe8457..055726f0c324eb 100644 --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -687,10 +687,10 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp) ; KNL-NEXT: cmovnel %eax, %r10d ; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: andl $1, %edi ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $14, %k1, %k1 @@ -699,7 +699,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $13, %k1, %k1 @@ -708,7 +708,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $12, %k1, %k1 @@ -717,7 +717,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $11, %k1, %k1 @@ -726,7 +726,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $10, %k1, %k1 @@ -735,7 +735,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $9, %k1, %k1 @@ -744,7 +744,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $8, %k1, %k1 @@ -753,7 +753,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $7, %k1, %k1 @@ -761,7 +761,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: movw $-513, %di ## imm = 0xFDFF ; KNL-NEXT: kmovw %edi, %k7 ; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $6, %k1, %k1 @@ -769,7 +769,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: movw $-1025, %di ## imm = 0xFBFF ; KNL-NEXT: kmovw %edi, %k4 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $5, %k1, %k1 @@ -777,7 +777,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: movw $-2049, %di ## imm = 0xF7FF ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $4, %k1, %k1 @@ -785,7 +785,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: movw $-4097, %di ## imm = 0xEFFF ; KNL-NEXT: kmovw %edi, %k2 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $3, %k1, %k1 @@ -793,7 +793,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: movw $-8193, %di ## imm = 0xDFFF ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $2, %k5, %k5 @@ -801,13 +801,13 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: movw $-16385, %di ## imm = 0xBFFF ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kandw %k0, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kshiftlw $1, %k5, %k5 ; KNL-NEXT: kshiftrw $1, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 @@ -838,70 +838,70 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $8, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $7, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kandw %k7, %k5, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $6, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kandw %k4, %k5, %k4 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $5, %k5, %k5 ; KNL-NEXT: korw %k5, %k4, %k4 ; KNL-NEXT: kandw %k3, %k4, %k3 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $4, %k4, %k4 ; KNL-NEXT: korw %k4, %k3, %k3 ; KNL-NEXT: kandw %k2, %k3, %k2 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 ; KNL-NEXT: kshiftrw $3, %k3, %k3 ; KNL-NEXT: korw %k3, %k2, %k2 ; KNL-NEXT: kandw %k1, %k2, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $2, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kandw %k0, %k1, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kshiftlw $14, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: korw %k1, %k0, %k0 @@ -1323,10 +1323,10 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: cmovnel %edx, %ecx ; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: cmovnel %edx, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: andl $1, %edx ; KNL_X32-NEXT: kmovw %edx, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 @@ -1335,7 +1335,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $13, %k1, %k1 @@ -1344,7 +1344,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $12, %k1, %k1 @@ -1353,7 +1353,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $11, %k1, %k1 @@ -1362,7 +1362,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $10, %k1, %k1 @@ -1371,7 +1371,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $9, %k1, %k1 @@ -1380,7 +1380,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $8, %k1, %k1 @@ -1389,7 +1389,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $7, %k1, %k1 @@ -1397,7 +1397,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: movw $-513, %dx ## imm = 0xFDFF ; KNL_X32-NEXT: kmovw %edx, %k7 ; KNL_X32-NEXT: kandw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $6, %k1, %k1 @@ -1405,7 +1405,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: movw $-1025, %dx ## imm = 0xFBFF ; KNL_X32-NEXT: kmovw %edx, %k4 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $5, %k1, %k1 @@ -1413,7 +1413,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: movw $-2049, %dx ## imm = 0xF7FF ; KNL_X32-NEXT: kmovw %edx, %k3 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $4, %k1, %k1 @@ -1421,7 +1421,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: movw $-4097, %dx ## imm = 0xEFFF ; KNL_X32-NEXT: kmovw %edx, %k2 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $3, %k1, %k1 @@ -1429,7 +1429,7 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: movw $-8193, %dx ## imm = 0xDFFF ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k5 ; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 ; KNL_X32-NEXT: kshiftrw $2, %k5, %k5 @@ -1437,20 +1437,20 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: movw $-16385, %dx ## imm = 0xBFFF ; KNL_X32-NEXT: kmovw %edx, %k0 ; KNL_X32-NEXT: kandw %k0, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $14, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kshiftlw $1, %k5, %k5 ; KNL_X32-NEXT: kshiftrw $1, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kmovw %k5, (%esp) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: andl $1, %edx -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl ; KNL_X32-NEXT: kmovw %ebx, %k5 ; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 ; KNL_X32-NEXT: kshiftrw $14, %k5, %k5 @@ -1458,91 +1458,91 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind { ; KNL_X32-NEXT: korw %k5, %k6, %k5 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $8, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload ; KNL_X32-NEXT: kandw %k6, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $7, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kandw %k7, %k5, %k5 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $6, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k5, %k5 ; KNL_X32-NEXT: kandw %k4, %k5, %k4 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k5 ; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 ; KNL_X32-NEXT: kshiftrw $5, %k5, %k5 ; KNL_X32-NEXT: korw %k5, %k4, %k4 ; KNL_X32-NEXT: kandw %k3, %k4, %k3 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k4 ; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 ; KNL_X32-NEXT: kshiftrw $4, %k4, %k4 ; KNL_X32-NEXT: korw %k4, %k3, %k3 ; KNL_X32-NEXT: kandw %k2, %k3, %k2 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k3 ; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 ; KNL_X32-NEXT: kshiftrw $3, %k3, %k3 ; KNL_X32-NEXT: korw %k3, %k2, %k2 ; KNL_X32-NEXT: kandw %k1, %k2, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $2, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 ; KNL_X32-NEXT: kandw %k0, %k1, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $14, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 ; KNL_X32-NEXT: kshiftlw $1, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; KNL_X32-NEXT: kmovw %edx, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: korw %k1, %k0, %k0 @@ -1957,10 +1957,10 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL-LABEL: test17: ; KNL: ## %bb.0: ; KNL-NEXT: movq %rdi, %rax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: andl $1, %edi ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $14, %k1, %k1 @@ -1968,7 +1968,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL-NEXT: movw $-5, %di ; KNL-NEXT: kmovw %edi, %k1 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $13, %k2, %k2 @@ -1976,7 +1976,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL-NEXT: movw $-9, %di ; KNL-NEXT: kmovw %edi, %k2 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 ; KNL-NEXT: kshiftrw $12, %k3, %k3 @@ -1984,7 +1984,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL-NEXT: movw $-17, %di ; KNL-NEXT: kmovw %edi, %k3 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $11, %k4, %k4 @@ -1992,7 +1992,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL-NEXT: movw $-33, %di ; KNL-NEXT: kmovw %edi, %k4 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $10, %k5, %k5 @@ -2000,241 +2000,241 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL-NEXT: movw $-65, %di ; KNL-NEXT: kmovw %edi, %k5 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: andl $1, %edi ; KNL-NEXT: kmovw %edi, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; KNL-NEXT: andl $1, %r10d -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: kmovw %r10d, %k6 ; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; KNL-NEXT: andl $1, %r10d -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: kmovw %r10d, %k6 ; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; KNL-NEXT: andl $1, %r10d -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: kmovw %r10d, %k6 ; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; KNL-NEXT: andl $1, %r10d -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: kmovw %r10d, %k6 ; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; KNL-NEXT: andl $1, %r10d -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: kmovw %r10d, %k6 ; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil ; KNL-NEXT: kmovw %edi, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 @@ -2262,51 +2262,51 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl ; KNL-NEXT: kmovw %edx, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $14, %k7, %k7 ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kandw %k1, %k6, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $12, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $11, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kandw %k4, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $10, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kandw %k5, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: kshiftrw $9, %k2, %k2 @@ -2743,10 +2743,10 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL_X32: ## %bb.0: ; KNL_X32-NEXT: pushl %ebx ; KNL_X32-NEXT: subl $16, %esp -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k1 ; KNL_X32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_X32-NEXT: kshiftrw $14, %k1, %k1 @@ -2754,7 +2754,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL_X32-NEXT: movw $-5, %ax ; KNL_X32-NEXT: kmovw %eax, %k1 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $13, %k2, %k2 @@ -2762,7 +2762,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL_X32-NEXT: movw $-9, %ax ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k3 ; KNL_X32-NEXT: kshiftlw $15, %k3, %k3 ; KNL_X32-NEXT: kshiftrw $12, %k3, %k3 @@ -2770,7 +2770,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL_X32-NEXT: movw $-17, %ax ; KNL_X32-NEXT: kmovw %eax, %k3 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k4 ; KNL_X32-NEXT: kshiftlw $15, %k4, %k4 ; KNL_X32-NEXT: kshiftrw $11, %k4, %k4 @@ -2778,7 +2778,7 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL_X32-NEXT: movw $-33, %ax ; KNL_X32-NEXT: kmovw %eax, %k4 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k5 ; KNL_X32-NEXT: kshiftlw $15, %k5, %k5 ; KNL_X32-NEXT: kshiftrw $10, %k5, %k5 @@ -2786,318 +2786,318 @@ define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x ; KNL_X32-NEXT: movw $-65, %ax ; KNL_X32-NEXT: kmovw %eax, %k5 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax ; KNL_X32-NEXT: kmovw %eax, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $14, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; KNL_X32-NEXT: kmovw %ecx, %k0 ; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; KNL_X32-NEXT: kmovw %ecx, %k0 ; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; KNL_X32-NEXT: kmovw %ecx, %k0 ; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; KNL_X32-NEXT: kmovw %ecx, %k0 ; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; KNL_X32-NEXT: kmovw %ecx, %k0 ; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: korw %k0, %k6, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $12, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $11, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $10, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $9, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k0, %k0 ; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; KNL_X32-NEXT: kmovw %ecx, %k0 ; KNL_X32-NEXT: kshiftlw $15, %k0, %k0 ; KNL_X32-NEXT: kshiftrw $14, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: korw %k0, %k7, %k0 ; KNL_X32-NEXT: kandw %k1, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $13, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 ; KNL_X32-NEXT: kandw %k2, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $12, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 ; KNL_X32-NEXT: kandw %k3, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $11, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 ; KNL_X32-NEXT: kandw %k4, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $10, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 ; KNL_X32-NEXT: kandw %k5, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $9, %k7, %k7 ; KNL_X32-NEXT: korw %k7, %k0, %k0 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: andl $1, %eax -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; KNL_X32-NEXT: kmovw %ecx, %k7 ; KNL_X32-NEXT: kshiftlw $15, %k7, %k7 ; KNL_X32-NEXT: kshiftrw $14, %k7, %k7 ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: korw %k7, %k6, %k6 ; KNL_X32-NEXT: kandw %k1, %k6, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k6 ; KNL_X32-NEXT: kshiftlw $15, %k6, %k6 ; KNL_X32-NEXT: kshiftrw $13, %k6, %k6 ; KNL_X32-NEXT: korw %k6, %k1, %k1 ; KNL_X32-NEXT: kandw %k2, %k1, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $12, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 ; KNL_X32-NEXT: kandw %k3, %k1, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $11, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 ; KNL_X32-NEXT: kandw %k4, %k1, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $10, %k2, %k2 ; KNL_X32-NEXT: korw %k2, %k1, %k1 ; KNL_X32-NEXT: kandw %k5, %k1, %k1 -; KNL_X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kshiftlw $15, %k2, %k2 ; KNL_X32-NEXT: kshiftrw $9, %k2, %k2 diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index 6fb8ba9f0ea27d..1bb3d2747fb49c 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -1886,10 +1886,10 @@ define void @extload_v8i64(ptr %a, ptr %res) { define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-LABEL: test21: ; KNL: # %bb.0: -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: andl $1, %eax ; KNL-NEXT: kmovw %eax, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $14, %k1, %k1 @@ -1899,7 +1899,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k7 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $13, %k1, %k1 @@ -1908,7 +1908,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $12, %k1, %k1 @@ -1917,7 +1917,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $11, %k1, %k1 @@ -1927,7 +1927,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k2 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $10, %k1, %k1 @@ -1936,7 +1936,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $9, %k1, %k1 @@ -1946,7 +1946,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k3 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $8, %k1, %k1 @@ -1955,7 +1955,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $7, %k1, %k1 @@ -1965,7 +1965,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k4 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $6, %k1, %k1 @@ -1974,7 +1974,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $5, %k1, %k1 @@ -1982,7 +1982,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: movw $-2049, %ax # imm = 0xF7FF ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $4, %k1, %k1 @@ -1991,7 +1991,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $3, %k1, %k1 @@ -2000,7 +2000,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $2, %k1, %k1 @@ -2009,13 +2009,13 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: kmovw %eax, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 @@ -2050,74 +2050,74 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload ; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $8, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $7, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k4, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $6, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload ; KNL-NEXT: kandw %k4, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; KNL-NEXT: kandw %k5, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $4, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload ; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $3, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload ; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $2, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload ; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $14, %k1, %k1 @@ -2125,97 +2125,97 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: korw %k1, %k6, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload ; KNL-NEXT: kandw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k7, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload ; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload ; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $8, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $7, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; KNL-NEXT: kandw %k3, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $6, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k4, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kandw %k5, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $4, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload ; KNL-NEXT: kandw %k0, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $3, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload ; KNL-NEXT: kandw %k2, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $2, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: korw %k6, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $14, %k6, %k6 @@ -2223,93 +2223,93 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; KNL-NEXT: korw %k6, %k7, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $13, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $8, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $7, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kandw %k3, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $6, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kandw %k4, %k6, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; KNL-NEXT: kandw %k3, %k5, %k4 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $4, %k5, %k5 ; KNL-NEXT: korw %k5, %k4, %k4 ; KNL-NEXT: kandw %k0, %k4, %k3 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $3, %k4, %k4 ; KNL-NEXT: korw %k4, %k3, %k3 ; KNL-NEXT: kandw %k2, %k3, %k2 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 ; KNL-NEXT: kshiftrw $2, %k3, %k3 ; KNL-NEXT: korw %k3, %k2, %k2 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 2-byte Reload ; KNL-NEXT: kandw %k0, %k2, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kshiftlw $14, %k2, %k2 ; KNL-NEXT: korw %k2, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: korw %k2, %k0, %k2 @@ -2340,10 +2340,10 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; ; AVX512DQNOBW-LABEL: test21: ; AVX512DQNOBW: # %bb.0: -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: andl $1, %eax ; AVX512DQNOBW-NEXT: kmovw %eax, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k1, %k1 @@ -2353,7 +2353,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k1, %k7 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k1, %k1 @@ -2362,7 +2362,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k1, %k1 @@ -2371,7 +2371,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k1, %k1 @@ -2381,7 +2381,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k1, %k2 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k1, %k1 @@ -2390,7 +2390,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k1, %k1 @@ -2400,7 +2400,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k1, %k3 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k1, %k1 @@ -2409,7 +2409,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k1, %k1 @@ -2419,7 +2419,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k1, %k4 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k1, %k1 @@ -2428,7 +2428,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k1, %k1 @@ -2436,7 +2436,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: movw $-2049, %ax # imm = 0xF7FF ; AVX512DQNOBW-NEXT: kmovw %eax, %k5 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k1, %k1 @@ -2445,7 +2445,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k1, %k1 @@ -2454,7 +2454,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k1, %k1 @@ -2463,13 +2463,13 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: kmovw %eax, %k1 ; AVX512DQNOBW-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 @@ -2504,74 +2504,74 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) # 2-byte Spill -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: andl $1, %eax -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %cl ; AVX512DQNOBW-NEXT: kmovw %ecx, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k0, %k0 @@ -2579,97 +2579,97 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: korw %k0, %k6, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k6, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k7, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k3, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k4, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k2, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k0, %k0 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: andl $1, %eax -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %cl ; AVX512DQNOBW-NEXT: kmovw %ecx, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $14, %k6, %k6 @@ -2677,93 +2677,93 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; AVX512DQNOBW-NEXT: korw %k6, %k7, %k6 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $13, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $12, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $11, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $10, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $9, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $8, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k5, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $7, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 ; AVX512DQNOBW-NEXT: kandw %k3, %k6, %k6 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k7 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQNOBW-NEXT: kshiftrw $6, %k7, %k7 ; AVX512DQNOBW-NEXT: korw %k7, %k6, %k6 ; AVX512DQNOBW-NEXT: kandw %k4, %k6, %k5 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k6 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQNOBW-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQNOBW-NEXT: korw %k6, %k5, %k5 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k3, %k5, %k4 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k5 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k5, %k5 ; AVX512DQNOBW-NEXT: kshiftrw $4, %k5, %k5 ; AVX512DQNOBW-NEXT: korw %k5, %k4, %k4 ; AVX512DQNOBW-NEXT: kandw %k1, %k4, %k3 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k4 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k4, %k4 ; AVX512DQNOBW-NEXT: kshiftrw $3, %k4, %k4 ; AVX512DQNOBW-NEXT: korw %k4, %k3, %k3 ; AVX512DQNOBW-NEXT: kandw %k2, %k3, %k2 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k3 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k3, %k3 ; AVX512DQNOBW-NEXT: kshiftrw $2, %k3, %k3 ; AVX512DQNOBW-NEXT: korw %k3, %k2, %k2 ; AVX512DQNOBW-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 # 2-byte Reload ; AVX512DQNOBW-NEXT: kandw %k1, %k2, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 ; AVX512DQNOBW-NEXT: kshiftlw $14, %k2, %k2 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1 ; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1 -; AVX512DQNOBW-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQNOBW-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQNOBW-NEXT: kmovw %eax, %k2 ; AVX512DQNOBW-NEXT: kshiftlw $15, %k2, %k2 ; AVX512DQNOBW-NEXT: korw %k2, %k1, %k1 diff --git a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll index e43b2f4b4abc46..cc34b3841d3f88 100644 --- a/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-extract-subvector-load-store.ll @@ -696,7 +696,7 @@ define void @load_v2i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) { define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) { ; AVX512-LABEL: load_v3i1_broadcast_1_v1i1_store: ; AVX512: # %bb.0: -; AVX512-NEXT: movzbl (%rdi), %eax +; AVX512-NEXT: movb (%rdi), %al ; AVX512-NEXT: shrb %al ; AVX512-NEXT: xorl %ecx, %ecx ; AVX512-NEXT: testb $1, %al @@ -711,7 +711,7 @@ define void @load_v3i1_broadcast_1_v1i1_store(ptr %a0,ptr %a1) { ; ; AVX512NOTDQ-LABEL: load_v3i1_broadcast_1_v1i1_store: ; AVX512NOTDQ: # %bb.0: -; AVX512NOTDQ-NEXT: movzbl (%rdi), %eax +; AVX512NOTDQ-NEXT: movb (%rdi), %al ; AVX512NOTDQ-NEXT: shrb %al ; AVX512NOTDQ-NEXT: xorl %ecx, %ecx ; AVX512NOTDQ-NEXT: testb $1, %al diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 51e704ba303be9..c40ce9ad2d95a2 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -308,7 +308,7 @@ define i16 @test15(ptr%addr) { define i16 @test16(ptr%addr, i16 %a) { ; KNL-LABEL: test16: ; KNL: ## %bb.0: -; KNL-NEXT: movzbl (%rdi), %eax +; KNL-NEXT: movb (%rdi), %al ; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: movw $-1025, %cx ## imm = 0xFBFF ; KNL-NEXT: kmovw %ecx, %k1 @@ -344,7 +344,7 @@ define i16 @test16(ptr%addr, i16 %a) { define i8 @test17(ptr%addr, i8 %a) { ; KNL-LABEL: test17: ; KNL: ## %bb.0: -; KNL-NEXT: movzbl (%rdi), %eax +; KNL-NEXT: movb (%rdi), %al ; KNL-NEXT: kmovw %esi, %k0 ; KNL-NEXT: movw $-17, %cx ; KNL-NEXT: kmovw %ecx, %k1 @@ -1429,7 +1429,7 @@ define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) { ; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi ; CHECK-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: andl $15, %edi -; CHECK-NEXT: movzbl -24(%rsp,%rdi), %eax +; CHECK-NEXT: movb -24(%rsp,%rdi), %al ; CHECK-NEXT: retq %t2 = extractelement <16 x i8> %t1, i32 %index ret i8 %t2 @@ -1448,7 +1448,7 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) { ; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi ; CHECK-NEXT: vmovaps %ymm0, (%rsp) ; CHECK-NEXT: andl $31, %edi -; CHECK-NEXT: movzbl (%rsp,%rdi), %eax +; CHECK-NEXT: movb (%rsp,%rdi), %al ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: vzeroupper @@ -1471,7 +1471,7 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) { ; CHECK-NEXT: ## kill: def $edi killed $edi def $rdi ; CHECK-NEXT: vmovaps %zmm0, (%rsp) ; CHECK-NEXT: andl $63, %edi -; CHECK-NEXT: movzbl (%rsp,%rdi), %eax +; CHECK-NEXT: movb (%rsp,%rdi), %al ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: vzeroupper @@ -1495,7 +1495,7 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index) ; CHECK-NEXT: vmovaps %zmm0, (%rsp) ; CHECK-NEXT: movzbl %dil, %eax ; CHECK-NEXT: andl $63, %eax -; CHECK-NEXT: movzbl (%rsp,%rax), %eax +; CHECK-NEXT: movb (%rsp,%rax), %al ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll index a84c32aeead4ac..3e9225315228fb 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-canonical.ll @@ -1911,7 +1911,7 @@ entry: define <4 x float> @test_mm_mask_fmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fmadd_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 @@ -1939,7 +1939,7 @@ entry: define <4 x float> @test_mm_mask_fmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fmadd_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 @@ -1961,7 +1961,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 define <4 x float> @test_mm_maskz_fmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 @@ -1988,7 +1988,7 @@ entry: define <4 x float> @test_mm_maskz_fmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 @@ -2010,7 +2010,7 @@ declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, < define <4 x float> @test_mm_mask3_fmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1] ; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 @@ -2040,7 +2040,7 @@ entry: define <4 x float> @test_mm_mask3_fmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1] ; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 @@ -2064,7 +2064,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, < define <4 x float> @test_mm_mask_fmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fmsub_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2] ; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 @@ -2093,7 +2093,7 @@ entry: define <4 x float> @test_mm_mask_fmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fmsub_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2] ; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 @@ -2114,7 +2114,7 @@ entry: define <4 x float> @test_mm_maskz_fmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 @@ -2142,7 +2142,7 @@ entry: define <4 x float> @test_mm_maskz_fmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 @@ -2163,7 +2163,7 @@ entry: define <4 x float> @test_mm_mask3_fmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] ; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 @@ -2194,7 +2194,7 @@ entry: define <4 x float> @test_mm_mask3_fmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1] ; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 @@ -2218,7 +2218,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, < define <4 x float> @test_mm_mask_fnmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fnmadd_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2] ; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 @@ -2247,7 +2247,7 @@ entry: define <4 x float> @test_mm_mask_fnmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fnmadd_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2] ; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 @@ -2268,7 +2268,7 @@ entry: define <4 x float> @test_mm_maskz_fnmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 @@ -2296,7 +2296,7 @@ entry: define <4 x float> @test_mm_maskz_fnmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 @@ -2317,7 +2317,7 @@ entry: define <4 x float> @test_mm_mask3_fnmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1] ; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 @@ -2348,7 +2348,7 @@ entry: define <4 x float> @test_mm_mask3_fnmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1] ; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 @@ -2371,7 +2371,7 @@ entry: define <4 x float> @test_mm_mask_fnmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fnmsub_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2] ; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 @@ -2401,7 +2401,7 @@ entry: define <4 x float> @test_mm_mask_fnmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fnmsub_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2] ; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 @@ -2423,7 +2423,7 @@ entry: define <4 x float> @test_mm_maskz_fnmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 @@ -2452,7 +2452,7 @@ entry: define <4 x float> @test_mm_maskz_fnmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 @@ -2474,7 +2474,7 @@ entry: define <4 x float> @test_mm_mask3_fnmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] ; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 @@ -2506,7 +2506,7 @@ entry: define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_round_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1] ; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 @@ -2529,7 +2529,7 @@ entry: define <2 x double> @test_mm_mask_fmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fmadd_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 @@ -2557,7 +2557,7 @@ entry: define <2 x double> @test_mm_mask_fmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fmadd_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) + xmm2 @@ -2579,7 +2579,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, define <2 x double> @test_mm_maskz_fmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 @@ -2606,7 +2606,7 @@ entry: define <2 x double> @test_mm_maskz_fmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 @@ -2628,7 +2628,7 @@ declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double> define <2 x double> @test_mm_mask3_fmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1] ; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 @@ -2658,7 +2658,7 @@ entry: define <2 x double> @test_mm_mask3_fmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1] ; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) + xmm2 @@ -2682,7 +2682,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double> define <2 x double> @test_mm_mask_fmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fmsub_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2] ; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 @@ -2711,7 +2711,7 @@ entry: define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fmsub_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2] ; X86-NEXT: ## xmm0 {%k1} = (xmm1 * xmm0) - xmm2 @@ -2732,7 +2732,7 @@ entry: define <2 x double> @test_mm_maskz_fmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 @@ -2760,7 +2760,7 @@ entry: define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 @@ -2781,7 +2781,7 @@ entry: define <2 x double> @test_mm_mask3_fmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] ; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 @@ -2812,7 +2812,7 @@ entry: define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1] ; X86-NEXT: ## xmm2 {%k1} = (xmm0 * xmm1) - xmm2 @@ -2836,7 +2836,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double> define <2 x double> @test_mm_mask_fnmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fnmadd_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2] ; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 @@ -2865,7 +2865,7 @@ entry: define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fnmadd_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2] ; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 @@ -2886,7 +2886,7 @@ entry: define <2 x double> @test_mm_maskz_fnmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 @@ -2914,7 +2914,7 @@ entry: define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 @@ -2935,7 +2935,7 @@ entry: define <2 x double> @test_mm_mask3_fnmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1] ; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 @@ -2966,7 +2966,7 @@ entry: define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1] ; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 @@ -2989,7 +2989,7 @@ entry: define <2 x double> @test_mm_mask_fnmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fnmsub_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2] ; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 @@ -3019,7 +3019,7 @@ entry: define <2 x double> @test_mm_mask_fnmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fnmsub_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2] ; X86-NEXT: ## xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 @@ -3041,7 +3041,7 @@ entry: define <2 x double> @test_mm_maskz_fnmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 @@ -3070,7 +3070,7 @@ entry: define <2 x double> @test_mm_maskz_fnmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 @@ -3092,7 +3092,7 @@ entry: define <2 x double> @test_mm_mask3_fnmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] ; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 @@ -3124,7 +3124,7 @@ entry: define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_round_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1] ; X86-NEXT: ## xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 @@ -3147,7 +3147,7 @@ entry: define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_add_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x58,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3172,7 +3172,7 @@ entry: define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_add_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x58,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3196,7 +3196,7 @@ entry: define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_add_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x58,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3221,7 +3221,7 @@ entry: define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_add_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x58,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3245,7 +3245,7 @@ entry: define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_sub_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5c,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3270,7 +3270,7 @@ entry: define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_sub_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5c,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3294,7 +3294,7 @@ entry: define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_sub_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5c,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3319,7 +3319,7 @@ entry: define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_sub_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5c,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3343,7 +3343,7 @@ entry: define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_mul_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x59,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3368,7 +3368,7 @@ entry: define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_mul_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x59,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3392,7 +3392,7 @@ entry: define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_mul_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x59,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3417,7 +3417,7 @@ entry: define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_mul_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x59,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3441,7 +3441,7 @@ entry: define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_div_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5e,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3466,7 +3466,7 @@ entry: define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_div_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5e,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3490,7 +3490,7 @@ entry: define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_div_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5e,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3515,7 +3515,7 @@ entry: define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_div_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5e,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index fc8f812bc14881..094aca69bfed1c 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -219,7 +219,7 @@ entry: define <8 x double> @test_mm512_mask_shuffle_f64x2(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_mask_shuffle_f64x2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1] ; X86-NEXT: retl @@ -239,7 +239,7 @@ entry: define <8 x double> @test_mm512_maskz_shuffle_f64x2(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_maskz_shuffle_f64x2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1] ; X86-NEXT: retl @@ -324,7 +324,7 @@ entry: define <8 x i64> @test_mm512_mask_shuffle_i64x2(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; X86-LABEL: test_mm512_mask_shuffle_i64x2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1] ; X86-NEXT: retl @@ -344,7 +344,7 @@ entry: define <8 x i64> @test_mm512_maskz_shuffle_i64x2(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 { ; X86-LABEL: test_mm512_maskz_shuffle_i64x2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1] ; X86-NEXT: retl @@ -425,7 +425,7 @@ entry: define zeroext i8 @test_mm512_mask_testn_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_testn_epi64_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -482,7 +482,7 @@ entry: define zeroext i8 @test_mm512_mask_test_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_test_epi64_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -557,7 +557,7 @@ entry: define <8 x i64> @test_mm512_mask_set1_epi64(<8 x i64> %__O, i8 zeroext %__M, i64 %__A) { ; X86-LABEL: test_mm512_mask_set1_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 @@ -580,7 +580,7 @@ entry: define <8 x i64> @test_mm512_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) { ; X86-LABEL: test_mm512_maskz_set1_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 @@ -667,7 +667,7 @@ define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) { define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i64> %a2) { ; X86-LABEL: test_mm512_mask_broadcastq_epi64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -686,7 +686,7 @@ define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i define <8 x i64> @test_mm512_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) { ; X86-LABEL: test_mm512_maskz_broadcastq_epi64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -714,7 +714,7 @@ define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a0) { define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 x double> %a2) { ; X86-LABEL: test_mm512_mask_broadcastsd_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -733,7 +733,7 @@ define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 define <8 x double> @test_mm512_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) { ; X86-LABEL: test_mm512_maskz_broadcastsd_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -808,7 +808,7 @@ define <8 x double> @test_mm512_movedup_pd(<8 x double> %a0) { define <8 x double> @test_mm512_mask_movedup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) { ; X86-LABEL: test_mm512_mask_movedup_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6] ; X86-NEXT: retl @@ -827,7 +827,7 @@ define <8 x double> @test_mm512_mask_movedup_pd(<8 x double> %a0, i8 %a1, <8 x d define <8 x double> @test_mm512_maskz_movedup_pd(i8 %a0, <8 x double> %a1) { ; X86-LABEL: test_mm512_maskz_movedup_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6] ; X86-NEXT: retl @@ -949,7 +949,7 @@ define <8 x double> @test_mm512_permute_pd(<8 x double> %a0) { define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) { ; X86-LABEL: test_mm512_mask_permute_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6] ; X86-NEXT: retl @@ -968,7 +968,7 @@ define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x d define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) { ; X86-LABEL: test_mm512_maskz_permute_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6] ; X86-NEXT: retl @@ -1043,7 +1043,7 @@ define <8 x i64> @test_mm512_permutex_epi64(<8 x i64> %a0) { define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) { ; X86-LABEL: test_mm512_mask_permutex_epi64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4] ; X86-NEXT: retl @@ -1062,7 +1062,7 @@ define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64 define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) { ; X86-LABEL: test_mm512_maskz_permutex_epi64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] ; X86-NEXT: retl @@ -1090,7 +1090,7 @@ define <8 x double> @test_mm512_permutex_pd(<8 x double> %a0) { define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) { ; X86-LABEL: test_mm512_mask_permutex_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4] ; X86-NEXT: retl @@ -1109,7 +1109,7 @@ define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) { ; X86-LABEL: test_mm512_maskz_permutex_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] ; X86-NEXT: retl @@ -1191,7 +1191,7 @@ define <8 x double> @test_mm512_shuffle_pd(<8 x double> %a0, <8 x double> %a1) { define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) { ; X86-LABEL: test_mm512_mask_shuffle_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] ; X86-NEXT: retl @@ -1210,7 +1210,7 @@ define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x d define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) { ; X86-LABEL: test_mm512_maskz_shuffle_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; X86-NEXT: retl @@ -1295,7 +1295,7 @@ define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) { define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) { ; X86-LABEL: test_mm512_mask_unpackhi_epi64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] ; X86-NEXT: retl @@ -1314,7 +1314,7 @@ define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64 define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { ; X86-LABEL: test_mm512_maskz_unpackhi_epi64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; X86-NEXT: retl @@ -1342,7 +1342,7 @@ define <8 x double> @test_mm512_unpackhi_pd(<8 x double> %a0, <8 x double> %a1) define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) { ; X86-LABEL: test_mm512_mask_unpackhi_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7] ; X86-NEXT: retl @@ -1361,7 +1361,7 @@ define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x define <8 x double> @test_mm512_maskz_unpackhi_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) { ; X86-LABEL: test_mm512_maskz_unpackhi_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] ; X86-NEXT: retl @@ -1493,7 +1493,7 @@ define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) { define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) { ; X86-LABEL: test_mm512_mask_unpacklo_epi64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] ; X86-NEXT: retl @@ -1512,7 +1512,7 @@ define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64 define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { ; X86-LABEL: test_mm512_maskz_unpacklo_epi64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; X86-NEXT: retl @@ -1540,7 +1540,7 @@ define <8 x double> @test_mm512_unpacklo_pd(<8 x double> %a0, <8 x double> %a1) define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) { ; X86-LABEL: test_mm512_mask_unpacklo_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6] ; X86-NEXT: retl @@ -1559,7 +1559,7 @@ define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x define <8 x double> @test_mm512_maskz_unpacklo_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) { ; X86-LABEL: test_mm512_maskz_unpacklo_pd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] ; X86-NEXT: retl @@ -1692,7 +1692,7 @@ define <8 x i64> @test_mm512_mul_epi32(<8 x i64> %__A, <8 x i64> %__B) nounwind define <8 x i64> @test_mm512_maskz_mul_epi32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B) nounwind { ; X86-LABEL: test_mm512_maskz_mul_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuldq %zmm0, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -1716,7 +1716,7 @@ entry: define <8 x i64> @test_mm512_mask_mul_epi32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__src) nounwind { ; X86-LABEL: test_mm512_mask_mul_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuldq %zmm0, %zmm1, %zmm2 {%k1} ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -1753,7 +1753,7 @@ define <8 x i64> @test_mm512_mul_epu32(<8 x i64> %__A, <8 x i64> %__B) nounwind define <8 x i64> @test_mm512_maskz_mul_epu32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B) nounwind { ; X86-LABEL: test_mm512_maskz_mul_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -1775,7 +1775,7 @@ entry: define <8 x i64> @test_mm512_mask_mul_epu32(i8 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__src) nounwind { ; X86-LABEL: test_mm512_mask_mul_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 {%k1} ; X86-NEXT: vmovdqa64 %zmm2, %zmm0 @@ -1799,7 +1799,7 @@ entry: define <8 x double> @test_mm512_set1_epi8(i8 signext %d) nounwind { ; X86-LABEL: test_mm512_set1_epi8: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: vmovd %eax, %xmm0 ; X86-NEXT: vpbroadcastb %xmm0, %ymm0 ; X86-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 @@ -1987,7 +1987,7 @@ entry: define <8 x double> @test_mm512_mask_cvtps_pd(<8 x double> %__W, i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm512_mask_cvtps_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2pd %ymm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -2007,7 +2007,7 @@ entry: define <8 x double> @test_mm512_mask_cvtpslo_pd(<8 x double> %__W, i8 zeroext %__U, <16 x float> %__A) { ; X86-LABEL: test_mm512_mask_cvtpslo_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2pd %ymm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -2028,7 +2028,7 @@ entry: define <8 x double> @test_mm512_maskz_cvtps_pd(i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm512_maskz_cvtps_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2pd %ymm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -2117,7 +2117,7 @@ entry: define <4 x i64> @test_mm512_mask_cvtepi64_epi32(<4 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_cvtepi64_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmovqd %zmm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -2139,7 +2139,7 @@ entry: define <4 x i64> @test_mm512_maskz_cvtepi64_epi32(i8 zeroext %__M, <8 x i64> %__A) { ; X86-LABEL: test_mm512_maskz_cvtepi64_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -2172,7 +2172,7 @@ entry: define <2 x i64> @test_mm512_mask_cvtepi64_epi16(<2 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_cvtepi64_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmovqw %zmm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper @@ -2194,7 +2194,7 @@ entry: define <2 x i64> @test_mm512_maskz_cvtepi64_epi16(i8 zeroext %__M, <8 x i64> %__A) { ; X86-LABEL: test_mm512_maskz_cvtepi64_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper @@ -2294,7 +2294,7 @@ declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64 define <8 x i64> @test_mm512_mask_ternarylogic_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) { ; X86-LABEL: test_mm512_mask_ternarylogic_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -2314,7 +2314,7 @@ entry: define <8 x i64> @test_mm512_maskz_ternarylogic_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) { ; X86-LABEL: test_mm512_maskz_ternarylogic_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -2364,7 +2364,7 @@ declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, define <8 x double> @test_mm512_mask2_permutex2var_pd(<8 x double> %__A, <8 x i64> %__I, i8 zeroext %__U, <8 x double> %__B) { ; X86-LABEL: test_mm512_mask2_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ; X86-NEXT: vmovapd %zmm1, %zmm0 @@ -2415,7 +2415,7 @@ declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i define <8 x i64> @test_mm512_mask2_permutex2var_epi64(<8 x i64> %__A, <8 x i64> %__I, i8 zeroext %__U, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask2_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ; X86-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -2509,7 +2509,7 @@ entry: define <8 x double> @test_mm512_mask_permutex2var_pd(<8 x double> %__A, i8 zeroext %__U, <8 x i64> %__I, <8 x double> %__B) { ; X86-LABEL: test_mm512_mask_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -2529,7 +2529,7 @@ entry: define <8 x double> @test_mm512_maskz_permutex2var_pd(i8 zeroext %__U, <8 x double> %__A, <8 x i64> %__I, <8 x double> %__B) { ; X86-LABEL: test_mm512_maskz_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2pd %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -2612,7 +2612,7 @@ entry: define <8 x i64> @test_mm512_mask_permutex2var_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__I, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2q %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -2632,7 +2632,7 @@ entry: define <8 x i64> @test_mm512_maskz_permutex2var_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__I, <8 x i64> %__B) { ; X86-LABEL: test_mm512_maskz_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2q %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -2651,7 +2651,7 @@ entry: define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_add_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2676,7 +2676,7 @@ entry: define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_add_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2700,7 +2700,7 @@ entry: define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_add_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2725,7 +2725,7 @@ entry: define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_add_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2749,7 +2749,7 @@ entry: define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_sub_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2774,7 +2774,7 @@ entry: define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_sub_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2798,7 +2798,7 @@ entry: define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_sub_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2823,7 +2823,7 @@ entry: define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_sub_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2847,7 +2847,7 @@ entry: define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_mul_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2872,7 +2872,7 @@ entry: define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_mul_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2896,7 +2896,7 @@ entry: define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_mul_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2921,7 +2921,7 @@ entry: define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_mul_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2945,7 +2945,7 @@ entry: define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_div_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2970,7 +2970,7 @@ entry: define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_div_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2994,7 +2994,7 @@ entry: define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_div_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -3019,7 +3019,7 @@ entry: define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_div_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -3056,7 +3056,7 @@ declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double>, <8 x double>, define <8 x double> @test_mm512_mask_fmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ; X86-NEXT: retl @@ -3076,7 +3076,7 @@ entry: define <8 x double> @test_mm512_mask3_fmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -3098,7 +3098,7 @@ entry: define <8 x double> @test_mm512_maskz_fmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -3136,7 +3136,7 @@ entry: define <8 x double> @test_mm512_mask_fmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ; X86-NEXT: retl @@ -3157,7 +3157,7 @@ entry: define <8 x double> @test_mm512_maskz_fmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -3196,7 +3196,7 @@ entry: define <8 x double> @test_mm512_mask3_fnmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fnmadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -3219,7 +3219,7 @@ entry: define <8 x double> @test_mm512_maskz_fnmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fnmadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -3255,7 +3255,7 @@ entry: define <8 x double> @test_mm512_maskz_fnmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fnmsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -3287,7 +3287,7 @@ entry: define <8 x double> @test_mm512_mask_fmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) + zmm2 ; X86-NEXT: retl @@ -3307,7 +3307,7 @@ entry: define <8 x double> @test_mm512_mask3_fmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) + zmm2 ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -3329,7 +3329,7 @@ entry: define <8 x double> @test_mm512_maskz_fmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2 ; X86-NEXT: retl @@ -3367,7 +3367,7 @@ entry: define <8 x double> @test_mm512_mask_fmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) - zmm2 ; X86-NEXT: retl @@ -3388,7 +3388,7 @@ entry: define <8 x double> @test_mm512_maskz_fmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) - zmm2 ; X86-NEXT: retl @@ -3427,7 +3427,7 @@ entry: define <8 x double> @test_mm512_mask3_fnmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231pd {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) + zmm2 ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -3450,7 +3450,7 @@ entry: define <8 x double> @test_mm512_maskz_fnmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213pd {{.*#+}} zmm0 {%k1} {z} = -(zmm1 * zmm0) + zmm2 ; X86-NEXT: retl @@ -3486,7 +3486,7 @@ entry: define <8 x double> @test_mm512_maskz_fnmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213pd {{.*#+}} zmm0 {%k1} {z} = -(zmm1 * zmm0) - zmm2 ; X86-NEXT: retl @@ -3984,7 +3984,7 @@ declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double define <8 x double> @test_mm512_mask_fmaddsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmaddsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ; X86-NEXT: retl @@ -4004,7 +4004,7 @@ entry: define <8 x double> @test_mm512_mask3_fmaddsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmaddsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -4026,7 +4026,7 @@ entry: define <8 x double> @test_mm512_maskz_fmaddsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmaddsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -4064,7 +4064,7 @@ entry: define <8 x double> @test_mm512_mask_fmsubadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmsubadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ; X86-NEXT: retl @@ -4085,7 +4085,7 @@ entry: define <8 x double> @test_mm512_maskz_fmsubadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmsubadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -4119,7 +4119,7 @@ entry: define <8 x double> @test_mm512_mask_fmaddsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2 ; X86-NEXT: retl @@ -4142,7 +4142,7 @@ entry: define <8 x double> @test_mm512_mask3_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2 ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -4167,7 +4167,7 @@ entry: define <8 x double> @test_mm512_maskz_fmaddsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2 ; X86-NEXT: retl @@ -4203,7 +4203,7 @@ entry: define <8 x double> @test_mm512_mask_fmsubadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd132pd {{.*#+}} zmm0 {%k1} = (zmm0 * zmm1) -/+ zmm2 ; X86-NEXT: retl @@ -4226,7 +4226,7 @@ entry: define <8 x double> @test_mm512_maskz_fmsubadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_maskz_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd213pd {{.*#+}} zmm0 {%k1} {z} = (zmm1 * zmm0) -/+ zmm2 ; X86-NEXT: retl @@ -4526,7 +4526,7 @@ entry: define <8 x double> @test_mm512_mask3_fmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -4549,7 +4549,7 @@ entry: define <8 x double> @test_mm512_mask3_fmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) - zmm2 ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -4618,7 +4618,7 @@ entry: define <8 x double> @test_mm512_mask3_fmsubadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsubadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -4641,7 +4641,7 @@ entry: define <8 x double> @test_mm512_mask3_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd231pd {{.*#+}} zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2 ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -4714,7 +4714,7 @@ entry: define <8 x double> @test_mm512_mask_fnmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fnmadd_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ; X86-NEXT: retl @@ -4735,7 +4735,7 @@ entry: define <8 x double> @test_mm512_mask_fnmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) + zmm2 ; X86-NEXT: retl @@ -4798,7 +4798,7 @@ entry: define <8 x double> @test_mm512_mask_fnmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fnmsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ; X86-NEXT: retl @@ -4820,7 +4820,7 @@ entry: define <8 x double> @test_mm512_mask3_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fnmsub_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -4844,7 +4844,7 @@ entry: define <8 x double> @test_mm512_mask_fnmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) { ; X86-LABEL: test_mm512_mask_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub132pd {{.*#+}} zmm0 {%k1} = -(zmm0 * zmm1) - zmm2 ; X86-NEXT: retl @@ -4866,7 +4866,7 @@ entry: define <8 x double> @test_mm512_mask3_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm512_mask3_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231pd {{.*#+}} zmm2 {%k1} = -(zmm0 * zmm1) - zmm2 ; X86-NEXT: vmovapd %zmm2, %zmm0 @@ -4982,7 +4982,7 @@ entry: define <4 x float> @test_mm_mask_fmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fmadd_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -5008,7 +5008,7 @@ entry: define <4 x float> @test_mm_mask_fmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fmadd_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -5035,7 +5035,7 @@ declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32) #1 define <4 x float> @test_mm_maskz_fmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213ss {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -5060,7 +5060,7 @@ entry: define <4 x float> @test_mm_maskz_fmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -5085,7 +5085,7 @@ entry: define <4 x float> @test_mm_mask3_fmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231ss {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5113,7 +5113,7 @@ entry: define <4 x float> @test_mm_mask3_fmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5140,7 +5140,7 @@ entry: define <4 x float> @test_mm_mask_fmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fmsub_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -5167,7 +5167,7 @@ entry: define <4 x float> @test_mm_mask_fmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fmsub_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -5193,7 +5193,7 @@ entry: define <4 x float> @test_mm_maskz_fmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213ss {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -5219,7 +5219,7 @@ entry: define <4 x float> @test_mm_maskz_fmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -5245,7 +5245,7 @@ entry: define <4 x float> @test_mm_mask3_fmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231ss {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5274,7 +5274,7 @@ entry: define <4 x float> @test_mm_mask3_fmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5302,7 +5302,7 @@ entry: define <4 x float> @test_mm_mask_fnmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fnmadd_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -5329,7 +5329,7 @@ entry: define <4 x float> @test_mm_mask_fnmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fnmadd_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -5355,7 +5355,7 @@ entry: define <4 x float> @test_mm_maskz_fnmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213ss {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -5381,7 +5381,7 @@ entry: define <4 x float> @test_mm_maskz_fnmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -5407,7 +5407,7 @@ entry: define <4 x float> @test_mm_mask3_fnmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5436,7 +5436,7 @@ entry: define <4 x float> @test_mm_mask3_fnmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5464,7 +5464,7 @@ entry: define <4 x float> @test_mm_mask_fnmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fnmsub_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -5492,7 +5492,7 @@ entry: define <4 x float> @test_mm_mask_fnmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_fnmsub_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -5519,7 +5519,7 @@ entry: define <4 x float> @test_mm_maskz_fnmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213ss {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -5546,7 +5546,7 @@ entry: define <4 x float> @test_mm_maskz_fnmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213ss {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -5573,7 +5573,7 @@ entry: define <4 x float> @test_mm_mask3_fnmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231ss {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5603,7 +5603,7 @@ entry: define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_round_ss: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231ss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5632,7 +5632,7 @@ entry: define <2 x double> @test_mm_mask_fmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fmadd_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213sd {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -5658,7 +5658,7 @@ entry: define <2 x double> @test_mm_mask_fmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fmadd_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -5685,7 +5685,7 @@ declare double @llvm.x86.avx512.vfmadd.f64(double, double, double, i32) #1 define <2 x double> @test_mm_maskz_fmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213sd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -5710,7 +5710,7 @@ entry: define <2 x double> @test_mm_maskz_fmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -5735,7 +5735,7 @@ entry: define <2 x double> @test_mm_mask3_fmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231sd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -5763,7 +5763,7 @@ entry: define <2 x double> @test_mm_mask3_fmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -5790,7 +5790,7 @@ entry: define <2 x double> @test_mm_mask_fmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fmsub_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213sd {{.*#+}} xmm0 {%k1} = (xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -5817,7 +5817,7 @@ entry: define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fmsub_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -5843,7 +5843,7 @@ entry: define <2 x double> @test_mm_maskz_fmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213sd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -5869,7 +5869,7 @@ entry: define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -5895,7 +5895,7 @@ entry: define <2 x double> @test_mm_mask3_fmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231sd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -5924,7 +5924,7 @@ entry: define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -5952,7 +5952,7 @@ entry: define <2 x double> @test_mm_mask_fnmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fnmadd_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213sd {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -5979,7 +5979,7 @@ entry: define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fnmadd_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -6005,7 +6005,7 @@ entry: define <2 x double> @test_mm_maskz_fnmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213sd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -6031,7 +6031,7 @@ entry: define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6057,7 +6057,7 @@ entry: define <2 x double> @test_mm_mask3_fnmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231sd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -6086,7 +6086,7 @@ entry: define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -6114,7 +6114,7 @@ entry: define <2 x double> @test_mm_mask_fnmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fnmsub_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213sd {{.*#+}} xmm0 {%k1} = -(xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -6142,7 +6142,7 @@ entry: define <2 x double> @test_mm_mask_fnmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_fnmsub_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -6169,7 +6169,7 @@ entry: define <2 x double> @test_mm_maskz_fnmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213sd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -6196,7 +6196,7 @@ entry: define <2 x double> @test_mm_maskz_fnmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6223,7 +6223,7 @@ entry: define <2 x double> @test_mm_mask3_fnmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231sd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -6253,7 +6253,7 @@ entry: define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_round_sd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1} ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -6283,7 +6283,7 @@ define <8 x i64> @test_mm512_mask_expandloadu_epi64(<8 x i64> %__W, i8 zeroext % ; X86-LABEL: test_mm512_mask_expandloadu_epi64: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} ; X86-NEXT: retl @@ -6303,7 +6303,7 @@ define <8 x i64> @test_mm512_maskz_expandloadu_epi64(i8 zeroext %__U, ptr readon ; X86-LABEL: test_mm512_maskz_expandloadu_epi64: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandq (%eax), %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -6323,7 +6323,7 @@ define <8 x double> @test_mm512_mask_expandloadu_pd(<8 x double> %__W, i8 zeroex ; X86-LABEL: test_mm512_mask_expandloadu_pd: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} ; X86-NEXT: retl @@ -6343,7 +6343,7 @@ define <8 x double> @test_mm512_maskz_expandloadu_pd(i8 zeroext %__U, ptr readon ; X86-LABEL: test_mm512_maskz_expandloadu_pd: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandpd (%eax), %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -6445,7 +6445,7 @@ entry: define void @test_mm512_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <8 x double> %__A) { ; X86-LABEL: test_mm512_mask_compressstoreu_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcompresspd %zmm0, (%ecx) {%k1} @@ -6467,7 +6467,7 @@ entry: define void @test_mm512_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_compressstoreu_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpcompressq %zmm0, (%ecx) {%k1} @@ -6728,7 +6728,7 @@ entry: define i64 @test_mm512_mask_reduce_add_epi64(i8 zeroext %__M, <8 x i64> %__W) { ; X86-LABEL: test_mm512_mask_reduce_add_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1 @@ -6773,7 +6773,7 @@ entry: define i64 @test_mm512_mask_reduce_mul_epi64(i8 zeroext %__M, <8 x i64> %__W) { ; X86-LABEL: test_mm512_mask_reduce_mul_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0] ; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} @@ -6862,7 +6862,7 @@ entry: define i64 @test_mm512_mask_reduce_and_epi64(i8 zeroext %__M, <8 x i64> %__W) { ; X86-LABEL: test_mm512_mask_reduce_and_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} @@ -6909,7 +6909,7 @@ entry: define i64 @test_mm512_mask_reduce_or_epi64(i8 zeroext %__M, <8 x i64> %__W) { ; X86-LABEL: test_mm512_mask_reduce_or_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: vextracti64x4 $1, %zmm0, %ymm1 @@ -7507,7 +7507,7 @@ define double @test_mm512_mask_reduce_add_pd(i8 zeroext %__M, <8 x double> %__W) ; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movzbl 8(%ebp), %eax +; X86-NEXT: movb 8(%ebp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: vextractf64x4 $1, %zmm0, %ymm1 @@ -7561,7 +7561,7 @@ define double @test_mm512_mask_reduce_mul_pd(i8 zeroext %__M, <8 x double> %__W) ; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movzbl 8(%ebp), %eax +; X86-NEXT: movb 8(%ebp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] ; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1} @@ -7976,7 +7976,7 @@ entry: define i64 @test_mm512_mask_reduce_max_epi64(i8 zeroext %__M, <8 x i64> %__W) { ; X86-LABEL: test_mm512_mask_reduce_max_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648,0,2147483648] ; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} @@ -8024,7 +8024,7 @@ entry: define i64 @test_mm512_mask_reduce_max_epu64(i8 zeroext %__M, <8 x i64> %__W) { ; X86-LABEL: test_mm512_mask_reduce_max_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,2,3] @@ -8077,7 +8077,7 @@ define double @test_mm512_mask_reduce_max_pd(i8 zeroext %__M, <8 x double> %__W) ; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movzbl 8(%ebp), %eax +; X86-NEXT: movb 8(%ebp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf,-Inf] ; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1} @@ -8126,7 +8126,7 @@ entry: define i64 @test_mm512_mask_reduce_min_epi64(i8 zeroext %__M, <8 x i64> %__W) { ; X86-LABEL: test_mm512_mask_reduce_min_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647,4294967295,2147483647] ; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} @@ -8174,7 +8174,7 @@ entry: define i64 @test_mm512_mask_reduce_min_epu64(i8 zeroext %__M, <8 x i64> %__W) { ; X86-LABEL: test_mm512_mask_reduce_min_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; X86-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} @@ -8229,7 +8229,7 @@ define double @test_mm512_mask_reduce_min_pd(i8 zeroext %__M, <8 x double> %__W) ; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movzbl 8(%ebp), %eax +; X86-NEXT: movb 8(%ebp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd {{.*#+}} zmm1 = [+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf,+Inf] ; X86-NEXT: vmovapd %zmm0, %zmm1 {%k1} @@ -8898,7 +8898,7 @@ entry: define <8 x double> @test_mm512_mask_max_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_mask_max_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -8918,7 +8918,7 @@ entry: define <8 x double> @test_mm512_maskz_max_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_maskz_max_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -8958,7 +8958,7 @@ entry: define <8 x double> @test_mm512_mask_max_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_mask_max_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -8980,7 +8980,7 @@ declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32 define <8 x double> @test_mm512_maskz_max_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_maskz_max_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -9082,7 +9082,7 @@ entry: define <8 x double> @test_mm512_mask_min_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_mask_min_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -9102,7 +9102,7 @@ entry: define <8 x double> @test_mm512_maskz_min_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_maskz_min_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vminpd %zmm1, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -9122,7 +9122,7 @@ entry: define <8 x double> @test_mm512_mask_min_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_mask_min_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -9144,7 +9144,7 @@ declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32 define <8 x double> @test_mm512_maskz_min_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { ; X86-LABEL: test_mm512_maskz_min_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vminpd %zmm1, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -9276,7 +9276,7 @@ entry: define <8 x double> @test_mm512_mask_sqrt_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A) { ; X86-LABEL: test_mm512_mask_sqrt_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtpd %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -9296,7 +9296,7 @@ entry: define <8 x double> @test_mm512_maskz_sqrt_pd(i8 zeroext %__U, <8 x double> %__A) { ; X86-LABEL: test_mm512_maskz_sqrt_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtpd %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -9316,7 +9316,7 @@ entry: define <8 x double> @test_mm512_mask_sqrt_round_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A) { ; X86-LABEL: test_mm512_mask_sqrt_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtpd {rn-sae}, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -9338,7 +9338,7 @@ declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32) define <8 x double> @test_mm512_maskz_sqrt_round_pd(i8 zeroext %__U, <8 x double> %__A) { ; X86-LABEL: test_mm512_maskz_sqrt_round_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtpd {rn-sae}, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -9537,7 +9537,7 @@ entry: define <8 x i64> @test_mm512_mask_rol_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_rol_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolq $5, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -9557,7 +9557,7 @@ entry: define <8 x i64> @test_mm512_maskz_rol_epi64(i8 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_maskz_rol_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolq $5, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -9647,7 +9647,7 @@ entry: define <8 x i64> @test_mm512_mask_rolv_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_rolv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvq %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -9667,7 +9667,7 @@ entry: define <8 x i64> @test_mm512_maskz_rolv_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_maskz_rolv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -9755,7 +9755,7 @@ entry: define <8 x i64> @test_mm512_mask_ror_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_mask_ror_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorq $5, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -9775,7 +9775,7 @@ entry: define <8 x i64> @test_mm512_maskz_ror_epi64(i8 zeroext %__U, <8 x i64> %__A) { ; X86-LABEL: test_mm512_maskz_ror_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorq $5, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -9865,7 +9865,7 @@ entry: define <8 x i64> @test_mm512_mask_rorv_epi64(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_rorv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvq %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -9885,7 +9885,7 @@ entry: define <8 x i64> @test_mm512_maskz_rorv_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_maskz_rorv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index b0be8c55711b93..cd48815a5cfc09 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -3330,7 +3330,7 @@ define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> % define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_move_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3349,7 +3349,7 @@ entry: define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_move_ss: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3367,7 +3367,7 @@ entry: define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_move_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2] ; X86-NEXT: retl ## encoding: [0xc3] @@ -3385,7 +3385,7 @@ entry: define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_move_sd: ; X86: ## %bb.0: ## %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1] ; X86-NEXT: retl ## encoding: [0xc3] @@ -6771,7 +6771,7 @@ define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) { ; X86: ## %bb.0: ; X86-NEXT: vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1] ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8] ; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -6857,7 +6857,7 @@ define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2 ; X86: ## %bb.0: ; X86-NEXT: vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1] ; X86-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: andb %cl, %al ## encoding: [0x20,0xc8] ; X86-NEXT: addb %cl, %al ## encoding: [0x00,0xc8] ; X86-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -9904,7 +9904,7 @@ declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8] ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xda] @@ -9941,7 +9941,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8] ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xda] @@ -9978,7 +9978,7 @@ declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double> define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8] ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda] @@ -10007,7 +10007,7 @@ declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, < define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2] ; X86-NEXT: ## xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 @@ -10029,7 +10029,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double> define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd9] @@ -10066,7 +10066,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, < define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd9] @@ -10101,7 +10101,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x flo define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) { ; X86-LABEL: fmadd_ss_mask_memfold: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04] ; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02] @@ -10149,7 +10149,7 @@ define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) { define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) { ; X86-LABEL: fmadd_ss_maskz_memfold: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04] ; X86-NEXT: vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02] @@ -10193,7 +10193,7 @@ define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) { define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) { ; X86-LABEL: fmadd_sd_mask_memfold: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04] ; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02] @@ -10237,7 +10237,7 @@ define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) { define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) { ; X86-LABEL: fmadd_sd_maskz_memfold: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04] ; X86-NEXT: vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02] @@ -10279,7 +10279,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double> define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd9] @@ -10316,7 +10316,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, < define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd9] @@ -10353,7 +10353,7 @@ declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda] ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd9] @@ -10390,7 +10390,7 @@ declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){ ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] ; X86-NEXT: vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda] ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd9] @@ -10426,7 +10426,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vfmadd231ss (%eax), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x08] ; X86-NEXT: ## xmm1 {%k1} = (xmm0 * mem) + xmm1 @@ -10450,7 +10450,7 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x f ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9] ; X86-NEXT: vfmadd132ss (%eax), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x00] ; X86-NEXT: ## xmm0 {%k1} = (xmm0 * mem) + xmm1 diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 3339549e5c4268..3a19945eb5f17f 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -1099,7 +1099,7 @@ define void @test_mask_store_ss(ptr %ptr, <4 x float> %data, i8 %mask) { ; X86-LABEL: test_mask_store_ss: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vmovss %xmm0, (%eax) {%k1} ; X86-NEXT: retl @@ -5735,7 +5735,7 @@ define <2 x double> @test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x d ; ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovapd %xmm0, %xmm3 ; X86-NEXT: vfmadd213sd {{.*#+}} xmm3 {%k1} = (xmm1 * xmm3) + xmm2 @@ -5786,7 +5786,7 @@ define <4 x float> @test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x flo ; ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovaps %xmm0, %xmm3 ; X86-NEXT: vfmadd213ss {{.*#+}} xmm3 {%k1} = (xmm1 * xmm3) + xmm2 @@ -5834,7 +5834,7 @@ define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x d ; ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovapd %xmm0, %xmm3 ; X86-NEXT: vfmadd213sd {{.*#+}} xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2 @@ -5876,7 +5876,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x flo ; ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovaps %xmm0, %xmm3 ; X86-NEXT: vfmadd213ss {{.*#+}} xmm3 {%k1} {z} = (xmm1 * xmm3) + xmm2 @@ -5916,7 +5916,7 @@ define <4 x float> @test_int_x86_avx512_maskz_vfmadd_ss_load0(i8 zeroext %0, ptr ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_load0: ; X86: # %bb.0: ; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: vmovaps (%ecx), %xmm0 ; X86-NEXT: kmovw %eax, %k1 @@ -5947,7 +5947,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x ; ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovapd %xmm2, %xmm3 ; X86-NEXT: vfmadd231sd {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) + xmm3 @@ -5998,7 +5998,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x fl ; ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovaps %xmm2, %xmm3 ; X86-NEXT: vfmadd231ss {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) + xmm3 @@ -6047,7 +6047,7 @@ define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) { ; ; X86-LABEL: fmadd_ss_mask_memfold: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -6093,7 +6093,7 @@ define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) { ; ; X86-LABEL: fmadd_ss_maskz_memfold: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero @@ -6139,7 +6139,7 @@ define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) { ; ; X86-LABEL: fmadd_sd_mask_memfold: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero @@ -6181,7 +6181,7 @@ define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) { ; ; X86-LABEL: fmadd_sd_maskz_memfold: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero @@ -6225,7 +6225,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x ; ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovapd %xmm2, %xmm3 ; X86-NEXT: vfmsub231sd {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) - xmm3 @@ -6282,7 +6282,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x fl ; ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovaps %xmm2, %xmm3 ; X86-NEXT: vfmsub231ss {{.*#+}} xmm3 {%k1} = (xmm0 * xmm1) - xmm3 @@ -6339,7 +6339,7 @@ define <2 x double> @test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x ; ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovapd %xmm2, %xmm3 ; X86-NEXT: vfnmsub231sd {{.*#+}} xmm3 {%k1} = -(xmm0 * xmm1) - xmm3 @@ -6399,7 +6399,7 @@ define <4 x float> @test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x f ; ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovaps %xmm2, %xmm3 ; X86-NEXT: vfnmsub231ss {{.*#+}} xmm3 {%k1} = -(xmm0 * xmm1) - xmm3 @@ -6455,7 +6455,7 @@ define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vfmadd231ss {{.*#+}} xmm1 {%k1} = (xmm0 * mem) + xmm1 ; X86-NEXT: vmovaps %xmm1, %xmm0 @@ -6483,7 +6483,7 @@ define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x f ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vfmadd132ss {{.*#+}} xmm0 {%k1} = (xmm0 * mem) + xmm1 ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512-load-store.ll b/llvm/test/CodeGen/X86/avx512-load-store.ll index c32c3d9b855039..d294855a909d66 100644 --- a/llvm/test/CodeGen/X86/avx512-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-load-store.ll @@ -13,7 +13,7 @@ define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x ; ; CHECK32-LABEL: test_mm_mask_move_ss: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} ; CHECK32-NEXT: retl @@ -36,7 +36,7 @@ define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 ; ; CHECK32-LABEL: test_mm_maskz_move_ss: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK32-NEXT: retl @@ -58,7 +58,7 @@ define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 ; ; CHECK32-LABEL: test_mm_mask_move_sd: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} ; CHECK32-NEXT: retl @@ -81,7 +81,7 @@ define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, < ; ; CHECK32-LABEL: test_mm_maskz_move_sd: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK32-NEXT: kmovw %eax, %k1 ; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ; CHECK32-NEXT: retl @@ -127,7 +127,7 @@ define void @test_mm_mask_store_sd(ptr %__W, i8 zeroext %__U, <2 x double> %__A) ; CHECK32-LABEL: test_mm_mask_store_sd: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovsd %xmm0, (%eax) {%k1} ; CHECK32-NEXT: retl @@ -174,7 +174,7 @@ define <2 x double> @test_mm_mask_load_sd(<2 x double> %__A, i8 zeroext %__U, pt ; CHECK32-LABEL: test_mm_mask_load_sd: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} ; CHECK32-NEXT: retl @@ -221,7 +221,7 @@ define <2 x double> @test_mm_maskz_load_sd(i8 zeroext %__U, ptr %__W) local_unna ; CHECK32-LABEL: test_mm_maskz_load_sd: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} {z} ; CHECK32-NEXT: retl @@ -245,7 +245,7 @@ define void @test_mm_mask_store_ss_2(ptr %__P, i8 zeroext %__U, <4 x float> %__A ; CHECK32-LABEL: test_mm_mask_store_ss_2: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovss %xmm0, (%eax) {%k1} ; CHECK32-NEXT: retl @@ -267,7 +267,7 @@ define void @test_mm_mask_store_sd_2(ptr %__P, i8 zeroext %__U, <2 x double> %__ ; CHECK32-LABEL: test_mm_mask_store_sd_2: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovsd %xmm0, (%eax) {%k1} ; CHECK32-NEXT: retl @@ -289,7 +289,7 @@ define <4 x float> @test_mm_mask_load_ss_2(<4 x float> %__A, i8 zeroext %__U, pt ; CHECK32-LABEL: test_mm_mask_load_ss_2: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} ; CHECK32-NEXT: retl @@ -312,7 +312,7 @@ define <4 x float> @test_mm_maskz_load_ss_2(i8 zeroext %__U, ptr readonly %__W) ; CHECK32-LABEL: test_mm_maskz_load_ss_2: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} {z} ; CHECK32-NEXT: retl @@ -334,7 +334,7 @@ define <2 x double> @test_mm_mask_load_sd_2(<2 x double> %__A, i8 zeroext %__U, ; CHECK32-LABEL: test_mm_mask_load_sd_2: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} ; CHECK32-NEXT: retl @@ -357,7 +357,7 @@ define <2 x double> @test_mm_maskz_load_sd_2(i8 zeroext %__U, ptr readonly %__W) ; CHECK32-LABEL: test_mm_maskz_load_sd_2: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: kmovw %ecx, %k1 ; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} {z} ; CHECK32-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll b/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll index 38179d9fcf68d5..38fa44b346729a 100644 --- a/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll +++ b/llvm/test/CodeGen/X86/avx512-load-trunc-store-i1.ll @@ -13,7 +13,7 @@ define void @load_v1i2_trunc_v1i1_store(ptr %a0,ptr %a1) { ; ; AVX512-ONLY-LABEL: load_v1i2_trunc_v1i1_store: ; AVX512-ONLY: # %bb.0: -; AVX512-ONLY-NEXT: movzbl (%rdi), %eax +; AVX512-ONLY-NEXT: movb (%rdi), %al ; AVX512-ONLY-NEXT: andl $1, %eax ; AVX512-ONLY-NEXT: kmovw %eax, %k0 ; AVX512-ONLY-NEXT: kmovw %k0, %eax @@ -35,7 +35,7 @@ define void @load_v1i3_trunc_v1i1_store(ptr %a0,ptr %a1) { ; ; AVX512-ONLY-LABEL: load_v1i3_trunc_v1i1_store: ; AVX512-ONLY: # %bb.0: -; AVX512-ONLY-NEXT: movzbl (%rdi), %eax +; AVX512-ONLY-NEXT: movb (%rdi), %al ; AVX512-ONLY-NEXT: andl $1, %eax ; AVX512-ONLY-NEXT: kmovw %eax, %k0 ; AVX512-ONLY-NEXT: kmovw %k0, %eax @@ -57,7 +57,7 @@ define void @load_v1i4_trunc_v1i1_store(ptr %a0,ptr %a1) { ; ; AVX512-ONLY-LABEL: load_v1i4_trunc_v1i1_store: ; AVX512-ONLY: # %bb.0: -; AVX512-ONLY-NEXT: movzbl (%rdi), %eax +; AVX512-ONLY-NEXT: movb (%rdi), %al ; AVX512-ONLY-NEXT: andl $1, %eax ; AVX512-ONLY-NEXT: kmovw %eax, %k0 ; AVX512-ONLY-NEXT: kmovw %k0, %eax @@ -79,7 +79,7 @@ define void @load_v1i8_trunc_v1i1_store(ptr %a0,ptr %a1) { ; ; AVX512-ONLY-LABEL: load_v1i8_trunc_v1i1_store: ; AVX512-ONLY: # %bb.0: -; AVX512-ONLY-NEXT: movzbl (%rdi), %eax +; AVX512-ONLY-NEXT: movb (%rdi), %al ; AVX512-ONLY-NEXT: andl $1, %eax ; AVX512-ONLY-NEXT: kmovw %eax, %k0 ; AVX512-ONLY-NEXT: kmovw %k0, %eax @@ -101,7 +101,7 @@ define void @load_v1i16_trunc_v1i1_store(ptr %a0,ptr %a1) { ; ; AVX512-ONLY-LABEL: load_v1i16_trunc_v1i1_store: ; AVX512-ONLY: # %bb.0: -; AVX512-ONLY-NEXT: movzbl (%rdi), %eax +; AVX512-ONLY-NEXT: movb (%rdi), %al ; AVX512-ONLY-NEXT: andl $1, %eax ; AVX512-ONLY-NEXT: kmovw %eax, %k0 ; AVX512-ONLY-NEXT: kmovw %k0, %eax @@ -123,7 +123,7 @@ define void @load_v1i32_trunc_v1i1_store(ptr %a0,ptr %a1) { ; ; AVX512-ONLY-LABEL: load_v1i32_trunc_v1i1_store: ; AVX512-ONLY: # %bb.0: -; AVX512-ONLY-NEXT: movzbl (%rdi), %eax +; AVX512-ONLY-NEXT: movb (%rdi), %al ; AVX512-ONLY-NEXT: andl $1, %eax ; AVX512-ONLY-NEXT: kmovw %eax, %k0 ; AVX512-ONLY-NEXT: kmovw %k0, %eax @@ -145,7 +145,7 @@ define void @load_v1i64_trunc_v1i1_store(ptr %a0,ptr %a1) { ; ; AVX512-ONLY-LABEL: load_v1i64_trunc_v1i1_store: ; AVX512-ONLY: # %bb.0: -; AVX512-ONLY-NEXT: movzbl (%rdi), %eax +; AVX512-ONLY-NEXT: movb (%rdi), %al ; AVX512-ONLY-NEXT: andl $1, %eax ; AVX512-ONLY-NEXT: kmovw %eax, %k0 ; AVX512-ONLY-NEXT: kmovw %k0, %eax diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index f9d6ac8e9db12d..53d681dbcf8693 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -55,7 +55,7 @@ define i8 @mask8(i8 %x) { ; ; X86-LABEL: mask8: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: notb %al ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> @@ -73,7 +73,7 @@ define i32 @mask8_zext(i8 %x) { ; ; X86-LABEL: mask8_zext: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: notb %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: retl @@ -277,7 +277,7 @@ define i8 @shuf_test1(i16 %v) nounwind { ; ; X86-LABEL: shuf_test1: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl %v1 = bitcast i16 %v to <16 x i1> %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> @@ -1996,7 +1996,7 @@ define void @store_i8_i1(i8 %x, ptr%y) { ; X86-LABEL: store_i8_i1: ; X86: ## %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $1, %cl ; X86-NEXT: movb %cl, (%eax) ; X86-NEXT: retl @@ -2951,7 +2951,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k2 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $10, %k1, %k1 @@ -2960,7 +2960,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $9, %k1, %k1 @@ -2970,7 +2970,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k3 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $8, %k1, %k1 @@ -2979,7 +2979,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $7, %k1, %k1 @@ -2989,7 +2989,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kandw %k1, %k0, %k0 ; KNL-NEXT: kmovw %k1, %k4 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $6, %k1, %k1 @@ -2998,7 +2998,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $5, %k1, %k1 @@ -3006,7 +3006,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: movw $-2049, %ax ## imm = 0xF7FF ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $4, %k1, %k1 @@ -3015,7 +3015,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $3, %k1, %k1 @@ -3024,7 +3024,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kshiftlw $15, %k1, %k1 ; KNL-NEXT: kshiftrw $2, %k1, %k1 @@ -3033,121 +3033,121 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: kmovw %eax, %k1 ; KNL-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $8, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $7, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $6, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $4, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $3, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $2, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k0 ; KNL-NEXT: kshiftlw $15, %k0, %k0 ; KNL-NEXT: kshiftrw $14, %k0, %k0 @@ -3155,97 +3155,97 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: korw %k0, %k6, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; KNL-NEXT: kandw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $13, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k7, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $12, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $11, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $10, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $9, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $8, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $7, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; KNL-NEXT: kandw %k3, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $6, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k4, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $4, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $3, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; KNL-NEXT: kandw %k2, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $2, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $14, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 ; KNL-NEXT: kshiftlw $1, %k0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: korw %k6, %k0, %k0 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: andl $1, %eax -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl ; KNL-NEXT: kmovw %ecx, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $14, %k6, %k6 @@ -3253,93 +3253,93 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; KNL-NEXT: korw %k6, %k7, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $13, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $12, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $11, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $10, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $9, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $8, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; KNL-NEXT: kandw %k5, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $7, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kandw %k3, %k6, %k6 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k7 ; KNL-NEXT: kshiftlw $15, %k7, %k7 ; KNL-NEXT: kshiftrw $6, %k7, %k7 ; KNL-NEXT: korw %k7, %k6, %k6 ; KNL-NEXT: kandw %k4, %k6, %k5 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k6 ; KNL-NEXT: kshiftlw $15, %k6, %k6 ; KNL-NEXT: kshiftrw $5, %k6, %k6 ; KNL-NEXT: korw %k6, %k5, %k5 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; KNL-NEXT: kandw %k3, %k5, %k4 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k5 ; KNL-NEXT: kshiftlw $15, %k5, %k5 ; KNL-NEXT: kshiftrw $4, %k5, %k5 ; KNL-NEXT: korw %k5, %k4, %k4 ; KNL-NEXT: kandw %k1, %k4, %k3 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k4 ; KNL-NEXT: kshiftlw $15, %k4, %k4 ; KNL-NEXT: kshiftrw $3, %k4, %k4 ; KNL-NEXT: korw %k4, %k3, %k3 ; KNL-NEXT: kandw %k2, %k3, %k2 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k3 ; KNL-NEXT: kshiftlw $15, %k3, %k3 ; KNL-NEXT: kshiftrw $2, %k3, %k3 ; KNL-NEXT: korw %k3, %k2, %k2 ; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; KNL-NEXT: kandw %k1, %k2, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kshiftlw $14, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 ; KNL-NEXT: kshiftlw $1, %k1, %k1 ; KNL-NEXT: kshiftrw $1, %k1, %k1 -; KNL-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; KNL-NEXT: movb {{[0-9]+}}(%rsp), %al ; KNL-NEXT: kmovw %eax, %k2 ; KNL-NEXT: kshiftlw $15, %k2, %k2 ; KNL-NEXT: korw %k2, %k1, %k1 @@ -3405,7 +3405,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k1, %k2 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $10, %k1, %k1 @@ -3414,7 +3414,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $9, %k1, %k1 @@ -3424,7 +3424,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k1, %k3 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $8, %k1, %k1 @@ -3433,7 +3433,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $7, %k1, %k1 @@ -3443,7 +3443,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k1, %k4 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $6, %k1, %k1 @@ -3452,7 +3452,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $5, %k1, %k1 @@ -3460,7 +3460,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: movw $-2049, %ax ## imm = 0xF7FF ; AVX512DQ-NEXT: kmovw %eax, %k5 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $4, %k1, %k1 @@ -3469,7 +3469,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $3, %k1, %k1 @@ -3478,7 +3478,7 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kshiftlw $15, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $2, %k1, %k1 @@ -3487,121 +3487,121 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: kmovw %eax, %k1 ; AVX512DQ-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: andl $1, %eax -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl ; AVX512DQ-NEXT: kmovw %ecx, %k0 ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: korw %k0, %k6, %k0 ; AVX512DQ-NEXT: kandw %k7, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k7, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k4, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k4, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: andl $1, %eax -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl ; AVX512DQ-NEXT: kmovw %ecx, %k0 ; AVX512DQ-NEXT: kshiftlw $15, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $14, %k0, %k0 @@ -3609,97 +3609,97 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: korw %k0, %k6, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $13, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k7, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $12, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $11, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $10, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $9, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $8, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $7, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k3, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $6, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k4, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $4, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $3, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k2, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $2, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $14, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 ; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0 ; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k0, %k0 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: andl $1, %eax -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %cl ; AVX512DQ-NEXT: kmovw %ecx, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $14, %k6, %k6 @@ -3707,93 +3707,93 @@ define void @store_64i1(ptr %a, <64 x i1> %v) { ; AVX512DQ-NEXT: korw %k6, %k7, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $13, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $12, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $11, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $10, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $9, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $8, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k5, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $7, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kandw %k3, %k6, %k6 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k7 ; AVX512DQ-NEXT: kshiftlw $15, %k7, %k7 ; AVX512DQ-NEXT: kshiftrw $6, %k7, %k7 ; AVX512DQ-NEXT: korw %k7, %k6, %k6 ; AVX512DQ-NEXT: kandw %k4, %k6, %k5 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k6 ; AVX512DQ-NEXT: kshiftlw $15, %k6, %k6 ; AVX512DQ-NEXT: kshiftrw $5, %k6, %k6 ; AVX512DQ-NEXT: korw %k6, %k5, %k5 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k3, %k5, %k4 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k5 ; AVX512DQ-NEXT: kshiftlw $15, %k5, %k5 ; AVX512DQ-NEXT: kshiftrw $4, %k5, %k5 ; AVX512DQ-NEXT: korw %k5, %k4, %k4 ; AVX512DQ-NEXT: kandw %k1, %k4, %k3 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k4 ; AVX512DQ-NEXT: kshiftlw $15, %k4, %k4 ; AVX512DQ-NEXT: kshiftrw $3, %k4, %k4 ; AVX512DQ-NEXT: korw %k4, %k3, %k3 ; AVX512DQ-NEXT: kandw %k2, %k3, %k2 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k3 ; AVX512DQ-NEXT: kshiftlw $15, %k3, %k3 ; AVX512DQ-NEXT: kshiftrw $2, %k3, %k3 ; AVX512DQ-NEXT: korw %k3, %k2, %k2 ; AVX512DQ-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload ; AVX512DQ-NEXT: kandw %k1, %k2, %k1 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k2 ; AVX512DQ-NEXT: kshiftlw $14, %k2, %k2 ; AVX512DQ-NEXT: korw %k2, %k1, %k1 ; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1 ; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1 -; AVX512DQ-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; AVX512DQ-NEXT: movb {{[0-9]+}}(%rsp), %al ; AVX512DQ-NEXT: kmovw %eax, %k2 ; AVX512DQ-NEXT: kshiftlw $15, %k2, %k2 ; AVX512DQ-NEXT: korw %k2, %k1, %k1 @@ -3960,7 +3960,7 @@ define i8 @test_v8i1_add(i8 %x, i8 %y) { ; ; X86-LABEL: test_v8i1_add: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> @@ -3980,7 +3980,7 @@ define i8 @test_v8i1_sub(i8 %x, i8 %y) { ; ; X86-LABEL: test_v8i1_sub: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> @@ -4000,7 +4000,7 @@ define i8 @test_v8i1_mul(i8 %x, i8 %y) { ; ; X86-LABEL: test_v8i1_mul: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl %m0 = bitcast i8 %x to <8 x i1> @@ -5132,7 +5132,7 @@ define i1 @test_v1i1_add(i1 %x, i1 %y) { ; ; X86-LABEL: test_v1i1_add: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl %m0 = bitcast i1 %x to <1 x i1> @@ -5152,7 +5152,7 @@ define i1 @test_v1i1_sub(i1 %x, i1 %y) { ; ; X86-LABEL: test_v1i1_sub: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl %m0 = bitcast i1 %x to <1 x i1> @@ -5172,7 +5172,7 @@ define i1 @test_v1i1_mul(i1 %x, i1 %y) { ; ; X86-LABEL: test_v1i1_mul: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl %m0 = bitcast i1 %x to <1 x i1> diff --git a/llvm/test/CodeGen/X86/avx512-select.ll b/llvm/test/CodeGen/X86/avx512-select.ll index 4c7633f48c69dc..60bc63275bd471 100644 --- a/llvm/test/CodeGen/X86/avx512-select.ll +++ b/llvm/test/CodeGen/X86/avx512-select.ll @@ -131,7 +131,7 @@ define <16 x double> @select04(<16 x double> %a, <16 x double> %b) { define i8 @select05(i8 %a.0, i8 %m) { ; X86-LABEL: select05: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: orb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; @@ -206,7 +206,7 @@ define i8 @select05_mem(ptr %a.0, ptr %m) { define i8 @select06(i8 %a.0, i8 %m) { ; X86-LABEL: select06: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll index 170197816ae199..b497ff7739d2cb 100644 --- a/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bf16-vl-intrinsics.ll @@ -321,7 +321,7 @@ entry: define <4 x float> @test_mm128_maskz_dpbf16ps_128(<4 x float> %E, <4 x i32> %A, <4 x i32> %B, i4 zeroext %U) local_unnamed_addr #2 { ; X86-LABEL: test_mm128_maskz_dpbf16ps_128: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x76,0x89,0x52,0xc2] ; X86-NEXT: retl # encoding: [0xc3] @@ -340,7 +340,7 @@ entry: define <4 x float> @test_mm128_mask_dpbf16ps_128(i4 zeroext %U, <4 x float> %E, <4 x i32> %A, <4 x i32> %B) local_unnamed_addr #2 { ; X86-LABEL: test_mm128_mask_dpbf16ps_128: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] ; X86-NEXT: vdpbf16ps %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x76,0x09,0x52,0xc2] ; X86-NEXT: retl # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll index ada2c8d53aa53a..bff2107c2e4478 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll @@ -111,7 +111,7 @@ define <8 x i64> @test_mm512_mask_set1_epi8(<8 x i64> %__O, i64 %__M, i8 signext ; X86: # %bb.0: # %entry ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kunpckdq %k1, %k0, %k1 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} ; X86-NEXT: retl @@ -136,7 +136,7 @@ define <8 x i64> @test_mm512_maskz_set1_epi8(i64 %__M, i8 signext %__A) { ; X86: # %bb.0: # %entry ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kunpckdq %k1, %k0, %k1 ; X86-NEXT: vpbroadcastb %eax, %zmm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll index a32b84986e8951..11dd987729d920 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll @@ -104,7 +104,7 @@ entry: define zeroext i8 @test_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_test_epi16_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1} ; X86-NEXT: kmovd %k0, %eax @@ -272,7 +272,7 @@ entry: define zeroext i8 @test_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_testn_epi16_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1} ; X86-NEXT: kmovd %k0, %eax @@ -343,7 +343,7 @@ entry: define <2 x i64> @test_mm_mask_set1_epi8(<2 x i64> %__O, i16 zeroext %__M, i8 signext %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm_mask_set1_epi8: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1} ; X86-NEXT: retl @@ -366,7 +366,7 @@ entry: define <2 x i64> @test_mm_maskz_set1_epi8(i16 zeroext %__M, i8 signext %__A) { ; X86-LABEL: test_mm_maskz_set1_epi8: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -388,7 +388,7 @@ entry: define <4 x i64> @test_mm256_mask_set1_epi8(<4 x i64> %__O, i32 %__M, i8 signext %__A){ ; X86-LABEL: test_mm256_mask_set1_epi8: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1} ; X86-NEXT: retl @@ -411,7 +411,7 @@ entry: define <4 x i64> @test_mm256_maskz_set1_epi8(i32 %__M, i8 signext %__A) { ; X86-LABEL: test_mm256_maskz_set1_epi8: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 ; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -479,7 +479,7 @@ define <2 x i64> @test_mm_mask_set1_epi16(<2 x i64> %__O, i8 zeroext %__M, i16 s ; X86-LABEL: test_mm_mask_set1_epi16: ; X86: # %bb.0: # %entry ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovd %ecx, %k1 ; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1} ; X86-NEXT: retl @@ -503,7 +503,7 @@ define <2 x i64> @test_mm_maskz_set1_epi16(i8 zeroext %__M, i16 signext %__A) { ; X86-LABEL: test_mm_maskz_set1_epi16: ; X86: # %bb.0: # %entry ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovd %ecx, %k1 ; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -641,7 +641,7 @@ define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) { define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) { ; X86-LABEL: test_mm_mask_broadcastw_epi16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -663,7 +663,7 @@ define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) { ; X86-LABEL: test_mm_maskz_broadcastw_epi16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -809,7 +809,7 @@ entry: define <2 x i64> @test_mm_mask2_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask2_permutex2var_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} ; X86-NEXT: vmovdqa %xmm1, %xmm0 @@ -874,7 +874,7 @@ entry: define <2 x i64> @test_mm_mask_permutex2var_epi16(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_permutex2var_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -898,7 +898,7 @@ entry: define <2 x i64> @test_mm_maskz_permutex2var_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_permutex2var_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index 4f69c9a676a084..2e4b2e0c5cc91c 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -6174,7 +6174,7 @@ define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) ; X86: # %bb.0: ; X86-NEXT: vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] ; X86-NEXT: retl # encoding: [0xc3] @@ -6283,7 +6283,7 @@ define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2 ; X86: # %bb.0: ; X86-NEXT: vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1] ; X86-NEXT: kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] ; X86-NEXT: retl # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll index d54784c71992b9..6b171848ee2a7b 100644 --- a/llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512ifma-intrinsics-fast-isel.ll @@ -17,7 +17,7 @@ entry: define <8 x i64> @test_mm512_mask_madd52hi_epu64(<8 x i64> %__W, i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y) { ; X86-LABEL: test_mm512_mask_madd52hi_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -37,7 +37,7 @@ entry: define <8 x i64> @test_mm512_maskz_madd52hi_epu64(i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y, <8 x i64> %__Z) { ; X86-LABEL: test_mm512_maskz_madd52hi_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52huq %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -67,7 +67,7 @@ entry: define <8 x i64> @test_mm512_mask_madd52lo_epu64(<8 x i64> %__W, i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y) { ; X86-LABEL: test_mm512_mask_madd52lo_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -87,7 +87,7 @@ entry: define <8 x i64> @test_mm512_maskz_madd52lo_epu64(i8 zeroext %__M, <8 x i64> %__X, <8 x i64> %__Y, <8 x i64> %__Z) { ; X86-LABEL: test_mm512_maskz_madd52lo_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52luq %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll index 4ee81405f608db..576980f6953467 100644 --- a/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512ifmavl-intrinsics-fast-isel.ll @@ -17,7 +17,7 @@ entry: define <2 x i64> @test_mm_mask_madd52hi_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) { ; X86-LABEL: test_mm_mask_madd52hi_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -38,7 +38,7 @@ entry: define <2 x i64> @test_mm_maskz_madd52hi_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { ; X86-LABEL: test_mm_maskz_madd52hi_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52huq %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -69,7 +69,7 @@ entry: define <4 x i64> @test_mm256_mask_madd52hi_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) { ; X86-LABEL: test_mm256_mask_madd52hi_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -90,7 +90,7 @@ entry: define <4 x i64> @test_mm256_maskz_madd52hi_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { ; X86-LABEL: test_mm256_maskz_madd52hi_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52huq %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -121,7 +121,7 @@ entry: define <2 x i64> @test_mm_mask_madd52lo_epu64(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) { ; X86-LABEL: test_mm_mask_madd52lo_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -142,7 +142,7 @@ entry: define <2 x i64> @test_mm_maskz_madd52lo_epu64(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y, <2 x i64> %__Z) { ; X86-LABEL: test_mm_maskz_madd52lo_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52luq %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -173,7 +173,7 @@ entry: define <4 x i64> @test_mm256_mask_madd52lo_epu64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) { ; X86-LABEL: test_mm256_mask_madd52lo_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -194,7 +194,7 @@ entry: define <4 x i64> @test_mm256_maskz_madd52lo_epu64(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y, <4 x i64> %__Z) { ; X86-LABEL: test_mm256_maskz_madd52lo_epu64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmadd52luq %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll index a05b2a36183ccb..cc528d0acc54d3 100644 --- a/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2-intrinsics-fast-isel.ll @@ -303,7 +303,7 @@ entry: define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_shldi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -325,7 +325,7 @@ declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_maskz_shldi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -475,7 +475,7 @@ entry: define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_shrdi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -497,7 +497,7 @@ declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>) define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_maskz_shrdi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -647,7 +647,7 @@ entry: define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_shldv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -667,7 +667,7 @@ entry: define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_maskz_shldv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl @@ -817,7 +817,7 @@ entry: define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_mask_shrdv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} ; X86-NEXT: retl @@ -837,7 +837,7 @@ entry: define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { ; X86-LABEL: test_mm512_maskz_shrdv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll index 4e69fcdc2fd076..5094180f46a816 100644 --- a/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vbmi2vl-intrinsics-fast-isel.ll @@ -7,7 +7,7 @@ define <2 x i64> @test_mm_mask_compress_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) { ; X86-LABEL: test_mm_mask_compress_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpcompressw %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -28,7 +28,7 @@ entry: define <2 x i64> @test_mm_maskz_compress_epi16(i8 zeroext %__U, <2 x i64> %__D) { ; X86-LABEL: test_mm_maskz_compress_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpcompressw %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -87,7 +87,7 @@ entry: define void @test_mm_mask_compressstoreu_epi16(ptr %__P, i8 zeroext %__U, <2 x i64> %__D) { ; X86-LABEL: test_mm_mask_compressstoreu_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpcompressw %xmm0, (%ecx) {%k1} @@ -128,7 +128,7 @@ entry: define <2 x i64> @test_mm_mask_expand_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__D) { ; X86-LABEL: test_mm_mask_expand_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpexpandw %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -149,7 +149,7 @@ entry: define <2 x i64> @test_mm_maskz_expand_epi16(i8 zeroext %__U, <2 x i64> %__D) { ; X86-LABEL: test_mm_maskz_expand_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpexpandw %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -209,7 +209,7 @@ define <2 x i64> @test_mm_mask_expandloadu_epi16(<2 x i64> %__S, i8 zeroext %__U ; X86-LABEL: test_mm_mask_expandloadu_epi16: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovd %ecx, %k1 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} ; X86-NEXT: retl @@ -231,7 +231,7 @@ define <2 x i64> @test_mm_maskz_expandloadu_epi16(i8 zeroext %__U, ptr readonly ; X86-LABEL: test_mm_maskz_expandloadu_epi16: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovd %ecx, %k1 ; X86-NEXT: vpexpandw (%eax), %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -574,7 +574,7 @@ entry: define <4 x i64> @test_mm256_mask_shldi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shldi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldq $47, %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -597,7 +597,7 @@ declare <4 x i64> @llvm.fshl.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) define <4 x i64> @test_mm256_maskz_shldi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shldi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -628,7 +628,7 @@ entry: define <2 x i64> @test_mm_mask_shldi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shldi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldq $47, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -651,7 +651,7 @@ declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) define <2 x i64> @test_mm_maskz_shldi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shldi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -682,7 +682,7 @@ entry: define <4 x i64> @test_mm256_mask_shldi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shldi_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldd $7, %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -708,7 +708,7 @@ declare <8 x i32> @llvm.fshl.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) define <4 x i64> @test_mm256_maskz_shldi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shldi_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -744,7 +744,7 @@ entry: define <2 x i64> @test_mm_mask_shldi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shldi_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldd $7, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -771,7 +771,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define <2 x i64> @test_mm_maskz_shldi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shldi_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -868,7 +868,7 @@ entry: define <2 x i64> @test_mm_mask_shldi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shldi_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldw $3, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -894,7 +894,7 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) define <2 x i64> @test_mm_maskz_shldi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shldi_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -930,7 +930,7 @@ entry: define <4 x i64> @test_mm256_mask_shrdi_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shrdi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdq $47, %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -953,7 +953,7 @@ declare <4 x i64> @llvm.fshr.v4i64(<4 x i64>, <4 x i64>, <4 x i64>) define <4 x i64> @test_mm256_maskz_shrdi_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shrdi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdq $63, %ymm1, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -984,7 +984,7 @@ entry: define <2 x i64> @test_mm_mask_shrdi_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shrdi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdq $47, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1007,7 +1007,7 @@ declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) define <2 x i64> @test_mm_maskz_shrdi_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shrdi_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdq $63, %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1038,7 +1038,7 @@ entry: define <4 x i64> @test_mm256_mask_shrdi_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shrdi_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdd $7, %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -1064,7 +1064,7 @@ declare <8 x i32> @llvm.fshr.v8i32(<8 x i32>, <8 x i32>, <8 x i32>) define <4 x i64> @test_mm256_maskz_shrdi_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shrdi_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdd $15, %ymm1, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1100,7 +1100,7 @@ entry: define <2 x i64> @test_mm_mask_shrdi_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shrdi_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdd $7, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1127,7 +1127,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define <2 x i64> @test_mm_maskz_shrdi_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shrdi_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdd $15, %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1224,7 +1224,7 @@ entry: define <2 x i64> @test_mm_mask_shrdi_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shrdi_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdw $3, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1250,7 +1250,7 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) define <2 x i64> @test_mm_maskz_shrdi_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shrdi_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdw $7, %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1286,7 +1286,7 @@ entry: define <4 x i64> @test_mm256_mask_shldv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shldv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -1307,7 +1307,7 @@ entry: define <4 x i64> @test_mm256_maskz_shldv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shldv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1338,7 +1338,7 @@ entry: define <2 x i64> @test_mm_mask_shldv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shldv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1359,7 +1359,7 @@ entry: define <2 x i64> @test_mm_maskz_shldv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shldv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1390,7 +1390,7 @@ entry: define <4 x i64> @test_mm256_mask_shldv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shldv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -1414,7 +1414,7 @@ entry: define <4 x i64> @test_mm256_maskz_shldv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shldv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1452,7 +1452,7 @@ entry: define <2 x i64> @test_mm_mask_shldv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shldv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1477,7 +1477,7 @@ entry: define <2 x i64> @test_mm_maskz_shldv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shldv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1576,7 +1576,7 @@ entry: define <2 x i64> @test_mm_mask_shldv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shldv_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1600,7 +1600,7 @@ entry: define <2 x i64> @test_mm_maskz_shldv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shldv_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1638,7 +1638,7 @@ entry: define <4 x i64> @test_mm256_mask_shrdv_epi64(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shrdv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -1659,7 +1659,7 @@ entry: define <4 x i64> @test_mm256_maskz_shrdv_epi64(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shrdv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1690,7 +1690,7 @@ entry: define <2 x i64> @test_mm_mask_shrdv_epi64(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shrdv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1711,7 +1711,7 @@ entry: define <2 x i64> @test_mm_maskz_shrdv_epi64(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shrdv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1742,7 +1742,7 @@ entry: define <4 x i64> @test_mm256_mask_shrdv_epi32(<4 x i64> %__S, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shrdv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -1766,7 +1766,7 @@ entry: define <4 x i64> @test_mm256_maskz_shrdv_epi32(i8 zeroext %__U, <4 x i64> %__S, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shrdv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1804,7 +1804,7 @@ entry: define <2 x i64> @test_mm_mask_shrdv_epi32(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shrdv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1829,7 +1829,7 @@ entry: define <2 x i64> @test_mm_maskz_shrdv_epi32(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shrdv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1928,7 +1928,7 @@ entry: define <2 x i64> @test_mm_mask_shrdv_epi16(<2 x i64> %__S, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_shrdv_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1952,7 +1952,7 @@ entry: define <2 x i64> @test_mm_maskz_shrdv_epi16(i8 zeroext %__U, <2 x i64> %__S, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_shrdv_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovd %eax, %k1 ; X86-NEXT: vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll index 173e2bad8acebc..b1281338ec56cb 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -7,7 +7,7 @@ define <4 x float> @test_mm_mask_cvtepi32_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_cvtepi32_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtdq2ps %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -29,7 +29,7 @@ entry: define <4 x float> @test_mm_maskz_cvtepi32_ps(i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_cvtepi32_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -51,7 +51,7 @@ entry: define <8 x float> @test_mm256_mask_cvtepi32_ps(<8 x float> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_cvtepi32_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtdq2ps %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -72,7 +72,7 @@ entry: define <8 x float> @test_mm256_maskz_cvtepi32_ps(i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_cvtepi32_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtdq2ps %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -93,7 +93,7 @@ entry: define <2 x i64> @test_mm_mask_cvtpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_mask_cvtpd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2dq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -113,7 +113,7 @@ entry: define <2 x i64> @test_mm_maskz_cvtpd_epi32(i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_maskz_cvtpd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2dq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -132,7 +132,7 @@ entry: define <2 x i64> @test_mm256_mask_cvtpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_mask_cvtpd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2dq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper @@ -157,7 +157,7 @@ entry: define <2 x i64> @test_mm256_maskz_cvtpd_epi32(i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_maskz_cvtpd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2dq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper @@ -181,7 +181,7 @@ entry: define <4 x float> @test_mm_mask_cvtpd_ps(<4 x float> %__W, i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_mask_cvtpd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2ps %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -199,7 +199,7 @@ entry: define <4 x float> @test_mm_maskz_cvtpd_ps(i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_maskz_cvtpd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2ps %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -217,7 +217,7 @@ entry: define <4 x float> @test_mm256_mask_cvtpd_ps(<4 x float> %__W, i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_mask_cvtpd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2ps %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper @@ -240,7 +240,7 @@ entry: define <4 x float> @test_mm256_maskz_cvtpd_ps(i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_maskz_cvtpd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper @@ -274,7 +274,7 @@ entry: define <2 x i64> @test_mm_mask_cvtpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_mask_cvtpd_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2udq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -294,7 +294,7 @@ entry: define <2 x i64> @test_mm_maskz_cvtpd_epu32(i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_maskz_cvtpd_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2udq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -325,7 +325,7 @@ entry: define <2 x i64> @test_mm256_mask_cvtpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_mask_cvtpd_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2udq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper @@ -347,7 +347,7 @@ entry: define <2 x i64> @test_mm256_maskz_cvtpd_epu32(i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_maskz_cvtpd_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtpd2udq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper @@ -368,7 +368,7 @@ entry: define <4 x float> @test_mm_mask_cvtph_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_cvtph_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtph2ps %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -392,7 +392,7 @@ entry: define <4 x float> @test_mm_maskz_cvtph_ps(i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_cvtph_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtph2ps %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -416,7 +416,7 @@ entry: define <8 x float> @test_mm256_mask_cvtph_ps(<8 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm256_mask_cvtph_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtph2ps %xmm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -438,7 +438,7 @@ entry: define <8 x float> @test_mm256_maskz_cvtph_ps(i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_cvtph_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtph2ps %xmm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -460,7 +460,7 @@ entry: define <2 x i64> @test_mm_mask_cvtps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_cvtps_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2dq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -483,7 +483,7 @@ entry: define <2 x i64> @test_mm_maskz_cvtps_epi32(i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_maskz_cvtps_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2dq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -505,7 +505,7 @@ entry: define <4 x i64> @test_mm256_mask_cvtps_epi32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_mask_cvtps_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2dq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -527,7 +527,7 @@ entry: define <4 x i64> @test_mm256_maskz_cvtps_epi32(i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_maskz_cvtps_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2dq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -548,7 +548,7 @@ entry: define <2 x double> @test_mm_mask_cvtps_pd(<2 x double> %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm_mask_cvtps_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2pd %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -570,7 +570,7 @@ entry: define <2 x double> @test_mm_maskz_cvtps_pd(i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm_maskz_cvtps_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2pd %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -592,7 +592,7 @@ entry: define <4 x double> @test_mm256_mask_cvtps_pd(<4 x double> %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm256_mask_cvtps_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2pd %xmm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -613,7 +613,7 @@ entry: define <4 x double> @test_mm256_maskz_cvtps_pd(i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm256_maskz_cvtps_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -645,7 +645,7 @@ entry: define <2 x i64> @test_mm_mask_cvtps_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_cvtps_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2udq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -665,7 +665,7 @@ entry: define <2 x i64> @test_mm_maskz_cvtps_epu32(i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_maskz_cvtps_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2udq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -695,7 +695,7 @@ entry: define <4 x i64> @test_mm256_mask_cvtps_epu32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_mask_cvtps_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2udq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -715,7 +715,7 @@ entry: define <4 x i64> @test_mm256_maskz_cvtps_epu32(i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_maskz_cvtps_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtps2udq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -734,7 +734,7 @@ entry: define <2 x i64> @test_mm_mask_cvttpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_mask_cvttpd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttpd2dq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -754,7 +754,7 @@ entry: define <2 x i64> @test_mm_maskz_cvttpd_epi32(i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_maskz_cvttpd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttpd2dq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -773,7 +773,7 @@ entry: define <2 x i64> @test_mm256_mask_cvttpd_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_mask_cvttpd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper @@ -798,7 +798,7 @@ entry: define <2 x i64> @test_mm256_maskz_cvttpd_epi32(i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_maskz_cvttpd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper @@ -833,7 +833,7 @@ entry: define <2 x i64> @test_mm_mask_cvttpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_mask_cvttpd_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttpd2udq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -853,7 +853,7 @@ entry: define <2 x i64> @test_mm_maskz_cvttpd_epu32(i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_maskz_cvttpd_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttpd2udq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -884,7 +884,7 @@ entry: define <2 x i64> @test_mm256_mask_cvttpd_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_mask_cvttpd_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper @@ -906,7 +906,7 @@ entry: define <2 x i64> @test_mm256_maskz_cvttpd_epu32(i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_maskz_cvttpd_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper @@ -927,7 +927,7 @@ entry: define <2 x i64> @test_mm_mask_cvttps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_cvttps_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -950,7 +950,7 @@ entry: define <2 x i64> @test_mm_maskz_cvttps_epi32(i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_maskz_cvttps_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -972,7 +972,7 @@ entry: define <4 x i64> @test_mm256_mask_cvttps_epi32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_mask_cvttps_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -994,7 +994,7 @@ entry: define <4 x i64> @test_mm256_maskz_cvttps_epi32(i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_maskz_cvttps_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1026,7 +1026,7 @@ entry: define <2 x i64> @test_mm_mask_cvttps_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_cvttps_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1046,7 +1046,7 @@ entry: define <2 x i64> @test_mm_maskz_cvttps_epu32(i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_maskz_cvttps_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1076,7 +1076,7 @@ entry: define <4 x i64> @test_mm256_mask_cvttps_epu32(<4 x i64> %__W, i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_mask_cvttps_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -1096,7 +1096,7 @@ entry: define <4 x i64> @test_mm256_maskz_cvttps_epu32(i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_maskz_cvttps_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1127,7 +1127,7 @@ entry: define <2 x double> @test_mm_mask_cvtepu32_pd(<2 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm_mask_cvtepu32_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtudq2pd %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1150,7 +1150,7 @@ entry: define <2 x double> @test_mm_maskz_cvtepu32_pd(i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm_maskz_cvtepu32_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1184,7 +1184,7 @@ entry: define <4 x double> @test_mm256_mask_cvtepu32_pd(<4 x double> %__W, i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm256_mask_cvtepu32_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtudq2pd %xmm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -1206,7 +1206,7 @@ entry: define <4 x double> @test_mm256_maskz_cvtepu32_pd(i8 zeroext %__U, <2 x i64> %__A) local_unnamed_addr #0 { ; X86-LABEL: test_mm256_maskz_cvtepu32_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtudq2pd %xmm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1239,7 +1239,7 @@ entry: define <4 x float> @test_mm_mask_cvtepu32_ps(<4 x float> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_cvtepu32_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtudq2ps %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -1261,7 +1261,7 @@ entry: define <4 x float> @test_mm_maskz_cvtepu32_ps(i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_cvtepu32_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtudq2ps %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1294,7 +1294,7 @@ entry: define <8 x float> @test_mm256_mask_cvtepu32_ps(<8 x float> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_cvtepu32_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtudq2ps %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -1315,7 +1315,7 @@ entry: define <8 x float> @test_mm256_maskz_cvtepu32_ps(i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_cvtepu32_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcvtudq2ps %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1346,7 +1346,7 @@ entry: define <8 x float> @test_mm256_mask_shuffle_f32x4(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { ; X86-LABEL: test_mm256_mask_shuffle_f32x4: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} = ymm1[4,5,6,7],ymm2[4,5,6,7] ; X86-NEXT: retl @@ -1366,7 +1366,7 @@ entry: define <8 x float> @test_mm256_maskz_shuffle_f32x4(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { ; X86-LABEL: test_mm256_maskz_shuffle_f32x4: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] ; X86-NEXT: retl @@ -1396,7 +1396,7 @@ entry: define <4 x double> @test_mm256_mask_shuffle_f64x2(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { ; X86-LABEL: test_mm256_mask_shuffle_f64x2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} = ymm1[2,3],ymm2[2,3] ; X86-NEXT: retl @@ -1417,7 +1417,7 @@ entry: define <4 x double> @test_mm256_maskz_shuffle_f64x2(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { ; X86-LABEL: test_mm256_maskz_shuffle_f64x2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] ; X86-NEXT: retl @@ -1448,7 +1448,7 @@ entry: define <4 x i64> @test_mm256_mask_shuffle_i32x4(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shuffle_i32x4: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} = ymm1[4,5,6,7],ymm2[4,5,6,7] ; X86-NEXT: retl @@ -1471,7 +1471,7 @@ entry: define <4 x i64> @test_mm256_maskz_shuffle_i32x4(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shuffle_i32x4: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] ; X86-NEXT: retl @@ -1503,7 +1503,7 @@ entry: define <4 x i64> @test_mm256_mask_shuffle_i64x2(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_shuffle_i64x2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} = ymm1[2,3],ymm2[2,3] ; X86-NEXT: retl @@ -1524,7 +1524,7 @@ entry: define <4 x i64> @test_mm256_maskz_shuffle_i64x2(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_shuffle_i64x2: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] ; X86-NEXT: retl @@ -1561,7 +1561,7 @@ entry: define zeroext i8 @test_mm_mask_test_epi32_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_test_epi32_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -1606,7 +1606,7 @@ entry: define zeroext i8 @test_mm256_mask_test_epi32_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_test_epi32_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -1650,7 +1650,7 @@ entry: define zeroext i8 @test_mm_mask_test_epi64_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_test_epi64_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -1694,7 +1694,7 @@ entry: define zeroext i8 @test_mm256_mask_test_epi64_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_test_epi64_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -1740,7 +1740,7 @@ entry: define zeroext i8 @test_mm_mask_testn_epi32_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_testn_epi32_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -1785,7 +1785,7 @@ entry: define zeroext i8 @test_mm256_mask_testn_epi32_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_testn_epi32_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -1829,7 +1829,7 @@ entry: define zeroext i8 @test_mm_mask_testn_epi64_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_testn_epi64_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -1873,7 +1873,7 @@ entry: define zeroext i8 @test_mm256_mask_testn_epi64_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_testn_epi64_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1} ; X86-NEXT: kmovw %k0, %eax @@ -1903,7 +1903,7 @@ entry: define <2 x i64> @test_mm_mask_set1_epi32(<2 x i64> %__O, i8 zeroext %__M) { ; X86-LABEL: test_mm_mask_set1_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 {%k1} ; X86-NEXT: retl @@ -1925,7 +1925,7 @@ entry: define <2 x i64> @test_mm_maskz_set1_epi32(i8 zeroext %__M) { ; X86-LABEL: test_mm_maskz_set1_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -1946,7 +1946,7 @@ entry: define <4 x i64> @test_mm256_mask_set1_epi32(<4 x i64> %__O, i8 zeroext %__M) { ; X86-LABEL: test_mm256_mask_set1_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 {%k1} ; X86-NEXT: retl @@ -1967,7 +1967,7 @@ entry: define <4 x i64> @test_mm256_maskz_set1_epi32(i8 zeroext %__M) { ; X86-LABEL: test_mm256_maskz_set1_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -1987,7 +1987,7 @@ entry: define <2 x i64> @test_mm_mask_set1_epi64(<2 x i64> %__O, i8 zeroext %__M, i64 %__A) { ; X86-LABEL: test_mm_mask_set1_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 @@ -2011,7 +2011,7 @@ entry: define <2 x i64> @test_mm_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) { ; X86-LABEL: test_mm_maskz_set1_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 @@ -2036,7 +2036,7 @@ entry: define <4 x i64> @test_mm256_mask_set1_epi64(<4 x i64> %__O, i8 zeroext %__M, i64 %__A) { ; X86-LABEL: test_mm256_mask_set1_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 @@ -2060,7 +2060,7 @@ entry: define <4 x i64> @test_mm256_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) { ; X86-LABEL: test_mm256_maskz_set1_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 @@ -2095,7 +2095,7 @@ define <2 x i64> @test_mm_broadcastd_epi32(<2 x i64> %a0) { define <2 x i64> @test_mm_mask_broadcastd_epi32(<2 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_broadcastd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2119,7 +2119,7 @@ entry: define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 zeroext %__M, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_broadcastd_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2153,7 +2153,7 @@ define <4 x i64> @test_mm256_broadcastd_epi32(<2 x i64> %a0) { define <4 x i64> @test_mm256_mask_broadcastd_epi32(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) { ; X86-LABEL: test_mm256_mask_broadcastd_epi32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastd %xmm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -2175,7 +2175,7 @@ define <4 x i64> @test_mm256_mask_broadcastd_epi32(<4 x i64> %a0, i8 %a1, <2 x i define <4 x i64> @test_mm256_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) { ; X86-LABEL: test_mm256_maskz_broadcastd_epi32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -2205,7 +2205,7 @@ define <2 x i64> @test_mm_broadcastq_epi64(<2 x i64> %a0) { define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_broadcastq_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2226,7 +2226,7 @@ entry: define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 zeroext %__M, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_broadcastq_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2256,7 +2256,7 @@ define <4 x i64> @test_mm256_broadcastq_epi64(<2 x i64> %a0) { define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %__O, i8 zeroext %__M, <2 x i64> %__A) { ; X86-LABEL: test_mm256_mask_broadcastq_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -2277,7 +2277,7 @@ entry: define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 zeroext %__M, <2 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_broadcastq_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -2307,7 +2307,7 @@ define <4 x double> @test_mm256_broadcastsd_pd(<2 x double> %a0) { define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %__O, i8 zeroext %__M, <2 x double> %__A) { ; X86-LABEL: test_mm256_mask_broadcastsd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -2328,7 +2328,7 @@ entry: define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 zeroext %__M, <2 x double> %__A) { ; X86-LABEL: test_mm256_maskz_broadcastsd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -2358,7 +2358,7 @@ define <4 x float> @test_mm_broadcastss_ps(<4 x float> %a0) { define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %__O, i8 zeroext %__M, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_broadcastss_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -2379,7 +2379,7 @@ entry: define <4 x float> @test_mm_maskz_broadcastss_ps(i8 zeroext %__M, <4 x float> %__A) { ; X86-LABEL: test_mm_maskz_broadcastss_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -2409,7 +2409,7 @@ define <8 x float> @test_mm256_broadcastss_ps(<4 x float> %a0) { define <8 x float> @test_mm256_mask_broadcastss_ps(<8 x float> %a0, i8 %a1, <4 x float> %a2) { ; X86-LABEL: test_mm256_mask_broadcastss_ps: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastss %xmm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -2428,7 +2428,7 @@ define <8 x float> @test_mm256_mask_broadcastss_ps(<8 x float> %a0, i8 %a1, <4 x define <8 x float> @test_mm256_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) { ; X86-LABEL: test_mm256_maskz_broadcastss_ps: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -2456,7 +2456,7 @@ define <2 x double> @test_mm_movddup_pd(<2 x double> %a0) { define <2 x double> @test_mm_mask_movedup_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_mask_movedup_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] ; X86-NEXT: retl @@ -2477,7 +2477,7 @@ entry: define <2 x double> @test_mm_maskz_movedup_pd(i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_maskz_movedup_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0] ; X86-NEXT: retl @@ -2507,7 +2507,7 @@ define <4 x double> @test_mm256_movddup_pd(<4 x double> %a0) { define <4 x double> @test_mm256_mask_movedup_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_mask_movedup_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2] ; X86-NEXT: retl @@ -2528,7 +2528,7 @@ entry: define <4 x double> @test_mm256_maskz_movedup_pd(i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_maskz_movedup_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2] ; X86-NEXT: retl @@ -2558,7 +2558,7 @@ define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) { define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_movehdup_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3] ; X86-NEXT: retl @@ -2579,7 +2579,7 @@ entry: define <4 x float> @test_mm_maskz_movehdup_ps(i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_maskz_movehdup_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3] ; X86-NEXT: retl @@ -2609,7 +2609,7 @@ define <8 x float> @test_mm256_movehdup_ps(<8 x float> %a0) { define <8 x float> @test_mm256_mask_movehdup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) { ; X86-LABEL: test_mm256_mask_movehdup_ps: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = ymm1[1,1,3,3,5,5,7,7] ; X86-NEXT: retl @@ -2628,7 +2628,7 @@ define <8 x float> @test_mm256_mask_movehdup_ps(<8 x float> %a0, i8 %a1, <8 x fl define <8 x float> @test_mm256_maskz_movehdup_ps(i8 %a0, <8 x float> %a1) { ; X86-LABEL: test_mm256_maskz_movehdup_ps: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7] ; X86-NEXT: retl @@ -2656,7 +2656,7 @@ define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) { define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_moveldup_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2] ; X86-NEXT: retl @@ -2677,7 +2677,7 @@ entry: define <4 x float> @test_mm_maskz_moveldup_ps(i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_maskz_moveldup_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2] ; X86-NEXT: retl @@ -2707,7 +2707,7 @@ define <8 x float> @test_mm256_moveldup_ps(<8 x float> %a0) { define <8 x float> @test_mm256_mask_moveldup_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2) { ; X86-LABEL: test_mm256_mask_moveldup_ps: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2,4,4,6,6] ; X86-NEXT: retl @@ -2726,7 +2726,7 @@ define <8 x float> @test_mm256_mask_moveldup_ps(<8 x float> %a0, i8 %a1, <8 x fl define <8 x float> @test_mm256_maskz_moveldup_ps(i8 %a0, <8 x float> %a1) { ; X86-LABEL: test_mm256_maskz_moveldup_ps: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6] ; X86-NEXT: retl @@ -2754,7 +2754,7 @@ define <4 x i64> @test_mm256_permutex_epi64(<4 x i64> %a0) { define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X) { ; X86-LABEL: test_mm256_mask_permutex_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[3,0,0,0] ; X86-NEXT: retl @@ -2775,7 +2775,7 @@ entry: define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 zeroext %__M, <4 x i64> %__X) { ; X86-LABEL: test_mm256_maskz_permutex_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] ; X86-NEXT: retl @@ -2805,7 +2805,7 @@ define <4 x double> @test_mm256_permutex_pd(<4 x double> %a0) { define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__X) { ; X86-LABEL: test_mm256_mask_permutex_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0] ; X86-NEXT: retl @@ -2826,7 +2826,7 @@ entry: define <4 x double> @test_mm256_maskz_permutex_pd(i8 zeroext %__U, <4 x double> %__X) { ; X86-LABEL: test_mm256_maskz_permutex_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0] ; X86-NEXT: retl @@ -2856,7 +2856,7 @@ define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_shuffle_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1] ; X86-NEXT: retl @@ -2877,7 +2877,7 @@ entry: define <2 x double> @test_mm_maskz_shuffle_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_shuffle_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] ; X86-NEXT: retl @@ -2907,7 +2907,7 @@ define <4 x double> @test_mm256_shuffle_pd(<4 x double> %a0, <4 x double> %a1) { define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { ; X86-LABEL: test_mm256_mask_shuffle_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2] ; X86-NEXT: retl @@ -2928,7 +2928,7 @@ entry: define <4 x double> @test_mm256_maskz_shuffle_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { ; X86-LABEL: test_mm256_maskz_shuffle_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2] ; X86-NEXT: retl @@ -2958,7 +2958,7 @@ define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) { define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_shuffle_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0] ; X86-NEXT: retl @@ -2979,7 +2979,7 @@ entry: define <4 x float> @test_mm_maskz_shuffle_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_shuffle_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0] ; X86-NEXT: retl @@ -3009,7 +3009,7 @@ define <8 x float> @test_mm256_shuffle_ps(<8 x float> %a0, <8 x float> %a1) { define <8 x float> @test_mm256_mask_shuffle_ps(<8 x float> %a0, i8 %a1, <8 x float> %a2, <8 x float> %a3) { ; X86-LABEL: test_mm256_mask_shuffle_ps: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufps {{.*#+}} ymm0 {%k1} = ymm1[0,1],ymm2[0,0],ymm1[4,5],ymm2[4,4] ; X86-NEXT: retl @@ -3028,7 +3028,7 @@ define <8 x float> @test_mm256_mask_shuffle_ps(<8 x float> %a0, i8 %a1, <8 x flo define <8 x float> @test_mm256_maskz_shuffle_ps(i8 %a0, <8 x float> %a1, <8 x float> %a2) { ; X86-LABEL: test_mm256_maskz_shuffle_ps: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1],ymm1[0,0],ymm0[4,5],ymm1[4,4] ; X86-NEXT: retl @@ -3047,7 +3047,7 @@ define <8 x float> @test_mm256_maskz_shuffle_ps(i8 %a0, <8 x float> %a1, <8 x fl define <4 x i64> @test_mm256_mask_mul_epi32(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind { ; X86-LABEL: test_mm256_mask_mul_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuldq %ymm1, %ymm2, %ymm0 {%k1} ; X86-NEXT: retl @@ -3072,7 +3072,7 @@ entry: define <4 x i64> @test_mm256_maskz_mul_epi32(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind { ; X86-LABEL: test_mm256_maskz_mul_epi32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuldq %ymm0, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -3096,7 +3096,7 @@ define <4 x i64> @test_mm256_maskz_mul_epi32(i8 zeroext %__M, <4 x i64> %__X, <4 define <2 x i64> @test_mm_mask_mul_epi32(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind { ; X86-LABEL: test_mm_mask_mul_epi32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuldq %xmm1, %xmm2, %xmm0 {%k1} ; X86-NEXT: retl @@ -3120,7 +3120,7 @@ define <2 x i64> @test_mm_mask_mul_epi32(<2 x i64> %__W, i8 zeroext %__M, <2 x i define <2 x i64> @test_mm_maskz_mul_epi32(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind { ; X86-LABEL: test_mm_maskz_mul_epi32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuldq %xmm0, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -3144,7 +3144,7 @@ define <2 x i64> @test_mm_maskz_mul_epi32(i8 zeroext %__M, <2 x i64> %__X, <2 x define <4 x i64> @test_mm256_mask_mul_epu32(<4 x i64> %__W, i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind { ; X86-LABEL: test_mm256_mask_mul_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuludq %ymm1, %ymm2, %ymm0 {%k1} ; X86-NEXT: retl @@ -3167,7 +3167,7 @@ entry: define <4 x i64> @test_mm256_maskz_mul_epu32(i8 zeroext %__M, <4 x i64> %__X, <4 x i64> %__Y) nounwind { ; X86-LABEL: test_mm256_maskz_mul_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -3190,7 +3190,7 @@ entry: define <2 x i64> @test_mm_mask_mul_epu32(<2 x i64> %__W, i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind { ; X86-LABEL: test_mm_mask_mul_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuludq %xmm1, %xmm2, %xmm0 {%k1} ; X86-NEXT: retl @@ -3213,7 +3213,7 @@ entry: define <2 x i64> @test_mm_maskz_mul_epu32(i8 zeroext %__M, <2 x i64> %__X, <2 x i64> %__Y) nounwind { ; X86-LABEL: test_mm_maskz_mul_epu32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -3311,7 +3311,7 @@ entry: define <2 x i64> @test_mm256_mask_cvtepi32_epi16(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_cvtepi32_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmovdw %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper @@ -3334,7 +3334,7 @@ entry: define <2 x i64> @test_mm256_maskz_cvtepi32_epi16(i8 zeroext %__M, <4 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_cvtepi32_epi16: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmovdw %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper @@ -3368,7 +3368,7 @@ entry: define <2 x i64> @test_mm256_mask_cvtepi64_epi32(<2 x i64> %__O, i8 zeroext %__M, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_cvtepi64_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmovqd %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper @@ -3393,7 +3393,7 @@ entry: define <2 x i64> @test_mm256_maskz_cvtepi64_epi32(i8 zeroext %__M, <4 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_cvtepi64_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpmovqd %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper @@ -3473,7 +3473,7 @@ declare <4 x i32> @llvm.x86.avx512.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32 define <2 x i64> @test_mm_mask_ternarylogic_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) { ; X86-LABEL: test_mm_mask_ternarylogic_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -3498,7 +3498,7 @@ entry: define <2 x i64> @test_mm_maskz_ternarylogic_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) { ; X86-LABEL: test_mm_maskz_ternarylogic_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogd $4, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -3539,7 +3539,7 @@ declare <8 x i32> @llvm.x86.avx512.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32 define <4 x i64> @test_mm256_mask_ternarylogic_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) { ; X86-LABEL: test_mm256_mask_ternarylogic_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -3563,7 +3563,7 @@ entry: define <4 x i64> @test_mm256_maskz_ternarylogic_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) { ; X86-LABEL: test_mm256_maskz_ternarylogic_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogd $4, %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -3599,7 +3599,7 @@ declare <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64 define <2 x i64> @test_mm_mask_ternarylogic_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__B, <2 x i64> %__C) { ; X86-LABEL: test_mm_mask_ternarylogic_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -3620,7 +3620,7 @@ entry: define <2 x i64> @test_mm_maskz_ternarylogic_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B, <2 x i64> %__C) { ; X86-LABEL: test_mm_maskz_ternarylogic_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogq $4, %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -3653,7 +3653,7 @@ declare <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64 define <4 x i64> @test_mm256_mask_ternarylogic_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__B, <4 x i64> %__C) { ; X86-LABEL: test_mm256_mask_ternarylogic_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -3674,7 +3674,7 @@ entry: define <4 x i64> @test_mm256_maskz_ternarylogic_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B, <4 x i64> %__C) { ; X86-LABEL: test_mm256_maskz_ternarylogic_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpternlogq $4, %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -3695,7 +3695,7 @@ entry: define <2 x i64> @test_mm_mask2_permutex2var_epi32(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask2_permutex2var_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2d %xmm2, %xmm0, %xmm1 {%k1} ; X86-NEXT: vmovdqa %xmm1, %xmm0 @@ -3722,7 +3722,7 @@ entry: define <4 x i64> @test_mm256_mask2_permutex2var_epi32(<4 x i64> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask2_permutex2var_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2d %ymm2, %ymm0, %ymm1 {%k1} ; X86-NEXT: vmovdqa %ymm1, %ymm0 @@ -3748,7 +3748,7 @@ entry: define <2 x double> @test_mm_mask2_permutex2var_pd(<2 x double> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x double> %__B) { ; X86-LABEL: test_mm_mask2_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} ; X86-NEXT: vmovapd %xmm1, %xmm0 @@ -3772,7 +3772,7 @@ entry: define <4 x double> @test_mm256_mask2_permutex2var_pd(<4 x double> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x double> %__B) { ; X86-LABEL: test_mm256_mask2_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} ; X86-NEXT: vmovapd %ymm1, %ymm0 @@ -3796,7 +3796,7 @@ entry: define <4 x float> @test_mm_mask2_permutex2var_ps(<4 x float> %__A, <2 x i64> %__I, i8 zeroext %__U, <4 x float> %__B) { ; X86-LABEL: test_mm_mask2_permutex2var_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ; X86-NEXT: vmovaps %xmm1, %xmm0 @@ -3821,7 +3821,7 @@ entry: define <8 x float> @test_mm256_mask2_permutex2var_ps(<8 x float> %__A, <4 x i64> %__I, i8 zeroext %__U, <8 x float> %__B) { ; X86-LABEL: test_mm256_mask2_permutex2var_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} ; X86-NEXT: vmovaps %ymm1, %ymm0 @@ -3845,7 +3845,7 @@ entry: define <2 x i64> @test_mm_mask2_permutex2var_epi64(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask2_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2q %xmm2, %xmm0, %xmm1 {%k1} ; X86-NEXT: vmovdqa %xmm1, %xmm0 @@ -3868,7 +3868,7 @@ entry: define <4 x i64> @test_mm256_mask2_permutex2var_epi64(<4 x i64> %__A, <4 x i64> %__I, i8 zeroext %__U, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask2_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermi2q %ymm2, %ymm0, %ymm1 {%k1} ; X86-NEXT: vmovdqa %ymm1, %ymm0 @@ -3905,7 +3905,7 @@ entry: define <2 x i64> @test_mm_mask_permutex2var_epi32(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_permutex2var_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2d %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -3930,7 +3930,7 @@ entry: define <2 x i64> @test_mm_maskz_permutex2var_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_permutex2var_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2d %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -3969,7 +3969,7 @@ entry: define <4 x i64> @test_mm256_mask_permutex2var_epi32(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_permutex2var_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2d %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -3993,7 +3993,7 @@ entry: define <4 x i64> @test_mm256_maskz_permutex2var_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_permutex2var_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2d %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -4027,7 +4027,7 @@ entry: define <2 x double> @test_mm_mask_permutex2var_pd(<2 x double> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x double> %__B) { ; X86-LABEL: test_mm_mask_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -4048,7 +4048,7 @@ entry: define <2 x double> @test_mm_maskz_permutex2var_pd(i8 zeroext %__U, <2 x double> %__A, <2 x i64> %__I, <2 x double> %__B) { ; X86-LABEL: test_mm_maskz_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2pd %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -4079,7 +4079,7 @@ entry: define <4 x double> @test_mm256_mask_permutex2var_pd(<4 x double> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x double> %__B) { ; X86-LABEL: test_mm256_mask_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -4100,7 +4100,7 @@ entry: define <4 x double> @test_mm256_maskz_permutex2var_pd(i8 zeroext %__U, <4 x double> %__A, <4 x i64> %__I, <4 x double> %__B) { ; X86-LABEL: test_mm256_maskz_permutex2var_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2pd %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -4132,7 +4132,7 @@ entry: define <4 x float> @test_mm_mask_permutex2var_ps(<4 x float> %__A, i8 zeroext %__U, <2 x i64> %__I, <4 x float> %__B) { ; X86-LABEL: test_mm_mask_permutex2var_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -4154,7 +4154,7 @@ entry: define <4 x float> @test_mm_maskz_permutex2var_ps(i8 zeroext %__U, <4 x float> %__A, <2 x i64> %__I, <4 x float> %__B) { ; X86-LABEL: test_mm_maskz_permutex2var_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2ps %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -4187,7 +4187,7 @@ entry: define <8 x float> @test_mm256_mask_permutex2var_ps(<8 x float> %__A, i8 zeroext %__U, <4 x i64> %__I, <8 x float> %__B) { ; X86-LABEL: test_mm256_mask_permutex2var_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -4208,7 +4208,7 @@ entry: define <8 x float> @test_mm256_maskz_permutex2var_ps(i8 zeroext %__U, <8 x float> %__A, <4 x i64> %__I, <8 x float> %__B) { ; X86-LABEL: test_mm256_maskz_permutex2var_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2ps %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -4239,7 +4239,7 @@ entry: define <2 x i64> @test_mm_mask_permutex2var_epi64(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2q %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -4260,7 +4260,7 @@ entry: define <2 x i64> @test_mm_maskz_permutex2var_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2q %xmm2, %xmm1, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -4291,7 +4291,7 @@ entry: define <4 x i64> @test_mm256_mask_permutex2var_epi64(<4 x i64> %__A, i8 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2q %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -4312,7 +4312,7 @@ entry: define <4 x i64> @test_mm256_maskz_permutex2var_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_permutex2var_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpermt2q %ymm2, %ymm1, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -4334,7 +4334,7 @@ entry: define <2 x double> @test_mm_mask_fmadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_mask_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) + xmm2 ; X86-NEXT: retl @@ -4355,7 +4355,7 @@ entry: define <2 x double> @test_mm_mask_fmsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_mask_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) - xmm2 ; X86-NEXT: retl @@ -4377,7 +4377,7 @@ entry: define <2 x double> @test_mm_mask3_fmadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -4400,7 +4400,7 @@ entry: define <2 x double> @test_mm_mask3_fnmadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231pd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -4424,7 +4424,7 @@ entry: define <2 x double> @test_mm_maskz_fmadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -4445,7 +4445,7 @@ entry: define <2 x double> @test_mm_maskz_fmsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -4467,7 +4467,7 @@ entry: define <2 x double> @test_mm_maskz_fnmadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213pd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -4489,7 +4489,7 @@ entry: define <2 x double> @test_mm_maskz_fnmsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213pd {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -4512,7 +4512,7 @@ entry: define <4 x double> @test_mm256_mask_fmadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_mask_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) + ymm2 ; X86-NEXT: retl @@ -4533,7 +4533,7 @@ entry: define <4 x double> @test_mm256_mask_fmsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_mask_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) - ymm2 ; X86-NEXT: retl @@ -4555,7 +4555,7 @@ entry: define <4 x double> @test_mm256_mask3_fmadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) + ymm2 ; X86-NEXT: vmovapd %ymm2, %ymm0 @@ -4578,7 +4578,7 @@ entry: define <4 x double> @test_mm256_mask3_fnmadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231pd {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) + ymm2 ; X86-NEXT: vmovapd %ymm2, %ymm0 @@ -4602,7 +4602,7 @@ entry: define <4 x double> @test_mm256_maskz_fmadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_maskz_fmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2 ; X86-NEXT: retl @@ -4623,7 +4623,7 @@ entry: define <4 x double> @test_mm256_maskz_fmsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_maskz_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) - ymm2 ; X86-NEXT: retl @@ -4645,7 +4645,7 @@ entry: define <4 x double> @test_mm256_maskz_fnmadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_maskz_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213pd {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) + ymm2 ; X86-NEXT: retl @@ -4667,7 +4667,7 @@ entry: define <4 x double> @test_mm256_maskz_fnmsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_maskz_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213pd {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) - ymm2 ; X86-NEXT: retl @@ -4690,7 +4690,7 @@ entry: define <4 x float> @test_mm_mask_fmadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_mask_fmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) + xmm2 ; X86-NEXT: retl @@ -4711,7 +4711,7 @@ entry: define <4 x float> @test_mm_mask_fmsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_mask_fmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) - xmm2 ; X86-NEXT: retl @@ -4733,7 +4733,7 @@ entry: define <4 x float> @test_mm_mask3_fmadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -4756,7 +4756,7 @@ entry: define <4 x float> @test_mm_mask3_fnmadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231ps {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) + xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -4780,7 +4780,7 @@ entry: define <4 x float> @test_mm_maskz_fmadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -4801,7 +4801,7 @@ entry: define <4 x float> @test_mm_maskz_fmsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -4823,7 +4823,7 @@ entry: define <4 x float> @test_mm_maskz_fnmadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213ps {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) + xmm2 ; X86-NEXT: retl @@ -4845,7 +4845,7 @@ entry: define <4 x float> @test_mm_maskz_fnmsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fnmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213ps {{.*#+}} xmm0 {%k1} {z} = -(xmm1 * xmm0) - xmm2 ; X86-NEXT: retl @@ -4868,7 +4868,7 @@ entry: define <8 x float> @test_mm256_mask_fmadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_mask_fmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) + ymm2 ; X86-NEXT: retl @@ -4888,7 +4888,7 @@ entry: define <8 x float> @test_mm256_mask_fmsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_mask_fmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) - ymm2 ; X86-NEXT: retl @@ -4909,7 +4909,7 @@ entry: define <8 x float> @test_mm256_mask3_fmadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) + ymm2 ; X86-NEXT: vmovaps %ymm2, %ymm0 @@ -4931,7 +4931,7 @@ entry: define <8 x float> @test_mm256_mask3_fnmadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fnmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd231ps {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) + ymm2 ; X86-NEXT: vmovaps %ymm2, %ymm0 @@ -4954,7 +4954,7 @@ entry: define <8 x float> @test_mm256_maskz_fmadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_maskz_fmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmadd213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) + ymm2 ; X86-NEXT: retl @@ -4974,7 +4974,7 @@ entry: define <8 x float> @test_mm256_maskz_fmsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_maskz_fmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) - ymm2 ; X86-NEXT: retl @@ -4995,7 +4995,7 @@ entry: define <8 x float> @test_mm256_maskz_fnmadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_maskz_fnmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd213ps {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) + ymm2 ; X86-NEXT: retl @@ -5016,7 +5016,7 @@ entry: define <8 x float> @test_mm256_maskz_fnmsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_maskz_fnmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub213ps {{.*#+}} ymm0 {%k1} {z} = -(ymm1 * ymm0) - ymm2 ; X86-NEXT: retl @@ -5038,7 +5038,7 @@ entry: define <2 x double> @test_mm_mask_fmaddsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_mask_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2 ; X86-NEXT: retl @@ -5062,7 +5062,7 @@ entry: define <2 x double> @test_mm_mask_fmsubadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_mask_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd132pd {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) -/+ xmm2 ; X86-NEXT: retl @@ -5086,7 +5086,7 @@ entry: define <2 x double> @test_mm_mask3_fmaddsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -5112,7 +5112,7 @@ entry: define <2 x double> @test_mm_maskz_fmaddsub_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2 ; X86-NEXT: retl @@ -5136,7 +5136,7 @@ entry: define <2 x double> @test_mm_maskz_fmsubadd_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_maskz_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd213pd {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) -/+ xmm2 ; X86-NEXT: retl @@ -5160,7 +5160,7 @@ entry: define <4 x double> @test_mm256_mask_fmaddsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_mask_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2 ; X86-NEXT: retl @@ -5184,7 +5184,7 @@ entry: define <4 x double> @test_mm256_mask_fmsubadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_mask_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd132pd {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) -/+ ymm2 ; X86-NEXT: retl @@ -5208,7 +5208,7 @@ entry: define <4 x double> @test_mm256_mask3_fmaddsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2 ; X86-NEXT: vmovapd %ymm2, %ymm0 @@ -5234,7 +5234,7 @@ entry: define <4 x double> @test_mm256_maskz_fmaddsub_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_maskz_fmaddsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2 ; X86-NEXT: retl @@ -5258,7 +5258,7 @@ entry: define <4 x double> @test_mm256_maskz_fmsubadd_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_maskz_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd213pd {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) -/+ ymm2 ; X86-NEXT: retl @@ -5282,7 +5282,7 @@ entry: define <4 x float> @test_mm_mask_fmaddsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_mask_fmaddsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) +/- xmm2 ; X86-NEXT: retl @@ -5306,7 +5306,7 @@ entry: define <4 x float> @test_mm_mask_fmsubadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_mask_fmsubadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd132ps {{.*#+}} xmm0 {%k1} = (xmm0 * xmm1) -/+ xmm2 ; X86-NEXT: retl @@ -5330,7 +5330,7 @@ entry: define <4 x float> @test_mm_mask3_fmaddsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmaddsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) +/- xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5356,7 +5356,7 @@ entry: define <4 x float> @test_mm_maskz_fmaddsub_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmaddsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) +/- xmm2 ; X86-NEXT: retl @@ -5380,7 +5380,7 @@ entry: define <4 x float> @test_mm_maskz_fmsubadd_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_maskz_fmsubadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd213ps {{.*#+}} xmm0 {%k1} {z} = (xmm1 * xmm0) -/+ xmm2 ; X86-NEXT: retl @@ -5404,7 +5404,7 @@ entry: define <8 x float> @test_mm256_mask_fmaddsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_mask_fmaddsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) +/- ymm2 ; X86-NEXT: retl @@ -5427,7 +5427,7 @@ entry: define <8 x float> @test_mm256_mask_fmsubadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_mask_fmsubadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd132ps {{.*#+}} ymm0 {%k1} = (ymm0 * ymm1) -/+ ymm2 ; X86-NEXT: retl @@ -5450,7 +5450,7 @@ entry: define <8 x float> @test_mm256_mask3_fmaddsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fmaddsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) +/- ymm2 ; X86-NEXT: vmovaps %ymm2, %ymm0 @@ -5475,7 +5475,7 @@ entry: define <8 x float> @test_mm256_maskz_fmaddsub_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_maskz_fmaddsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmaddsub213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) +/- ymm2 ; X86-NEXT: retl @@ -5498,7 +5498,7 @@ entry: define <8 x float> @test_mm256_maskz_fmsubadd_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_maskz_fmsubadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd213ps {{.*#+}} ymm0 {%k1} {z} = (ymm1 * ymm0) -/+ ymm2 ; X86-NEXT: retl @@ -5521,7 +5521,7 @@ entry: define <2 x double> @test_mm_mask3_fmsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -5545,7 +5545,7 @@ entry: define <4 x double> @test_mm256_mask3_fmsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) - ymm2 ; X86-NEXT: vmovapd %ymm2, %ymm0 @@ -5569,7 +5569,7 @@ entry: define <4 x float> @test_mm_mask3_fmsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5593,7 +5593,7 @@ entry: define <8 x float> @test_mm256_mask3_fmsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsub231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) - ymm2 ; X86-NEXT: vmovaps %ymm2, %ymm0 @@ -5616,7 +5616,7 @@ entry: define <2 x double> @test_mm_mask3_fmsubadd_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd231pd {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -5642,7 +5642,7 @@ entry: define <4 x double> @test_mm256_mask3_fmsubadd_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fmsubadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd231pd {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2 ; X86-NEXT: vmovapd %ymm2, %ymm0 @@ -5668,7 +5668,7 @@ entry: define <4 x float> @test_mm_mask3_fmsubadd_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fmsubadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd231ps {{.*#+}} xmm2 {%k1} = (xmm0 * xmm1) -/+ xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5694,7 +5694,7 @@ entry: define <8 x float> @test_mm256_mask3_fmsubadd_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fmsubadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfmsubadd231ps {{.*#+}} ymm2 {%k1} = (ymm0 * ymm1) -/+ ymm2 ; X86-NEXT: vmovaps %ymm2, %ymm0 @@ -5719,7 +5719,7 @@ entry: define <2 x double> @test_mm_mask_fnmadd_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_mask_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd132pd {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) + xmm2 ; X86-NEXT: retl @@ -5741,7 +5741,7 @@ entry: define <4 x double> @test_mm256_mask_fnmadd_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_mask_fnmadd_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd132pd {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) + ymm2 ; X86-NEXT: retl @@ -5763,7 +5763,7 @@ entry: define <4 x float> @test_mm_mask_fnmadd_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_mask_fnmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd132ps {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) + xmm2 ; X86-NEXT: retl @@ -5785,7 +5785,7 @@ entry: define <8 x float> @test_mm256_mask_fnmadd_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_mask_fnmadd_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmadd132ps {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) + ymm2 ; X86-NEXT: retl @@ -5806,7 +5806,7 @@ entry: define <2 x double> @test_mm_mask_fnmsub_pd(<2 x double> %__A, i8 zeroext %__U, <2 x double> %__B, <2 x double> %__C) { ; X86-LABEL: test_mm_mask_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub132pd {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: retl @@ -5829,7 +5829,7 @@ entry: define <2 x double> @test_mm_mask3_fnmsub_pd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231pd {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovapd %xmm2, %xmm0 @@ -5854,7 +5854,7 @@ entry: define <4 x double> @test_mm256_mask_fnmsub_pd(<4 x double> %__A, i8 zeroext %__U, <4 x double> %__B, <4 x double> %__C) { ; X86-LABEL: test_mm256_mask_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub132pd {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) - ymm2 ; X86-NEXT: retl @@ -5877,7 +5877,7 @@ entry: define <4 x double> @test_mm256_mask3_fnmsub_pd(<4 x double> %__A, <4 x double> %__B, <4 x double> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fnmsub_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231pd {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) - ymm2 ; X86-NEXT: vmovapd %ymm2, %ymm0 @@ -5902,7 +5902,7 @@ entry: define <4 x float> @test_mm_mask_fnmsub_ps(<4 x float> %__A, i8 zeroext %__U, <4 x float> %__B, <4 x float> %__C) { ; X86-LABEL: test_mm_mask_fnmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub132ps {{.*#+}} xmm0 {%k1} = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: retl @@ -5925,7 +5925,7 @@ entry: define <4 x float> @test_mm_mask3_fnmsub_ps(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm_mask3_fnmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231ps {{.*#+}} xmm2 {%k1} = -(xmm0 * xmm1) - xmm2 ; X86-NEXT: vmovaps %xmm2, %xmm0 @@ -5950,7 +5950,7 @@ entry: define <8 x float> @test_mm256_mask_fnmsub_ps(<8 x float> %__A, i8 zeroext %__U, <8 x float> %__B, <8 x float> %__C) { ; X86-LABEL: test_mm256_mask_fnmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub132ps {{.*#+}} ymm0 {%k1} = -(ymm0 * ymm1) - ymm2 ; X86-NEXT: retl @@ -5972,7 +5972,7 @@ entry: define <8 x float> @test_mm256_mask3_fnmsub_ps(<8 x float> %__A, <8 x float> %__B, <8 x float> %__C, i8 zeroext %__U) { ; X86-LABEL: test_mm256_mask3_fnmsub_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vfnmsub231ps {{.*#+}} ymm2 {%k1} = -(ymm0 * ymm1) - ymm2 ; X86-NEXT: vmovaps %ymm2, %ymm0 @@ -5997,7 +5997,7 @@ define <2 x double> @test_mm_mask_expandloadu_pd(<2 x double> %__W, i8 zeroext % ; X86-LABEL: test_mm_mask_expandloadu_pd: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} ; X86-NEXT: retl @@ -6018,7 +6018,7 @@ define <2 x double> @test_mm_maskz_expandloadu_pd(i8 zeroext %__U, ptr readonly ; X86-LABEL: test_mm_maskz_expandloadu_pd: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandpd (%eax), %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6039,7 +6039,7 @@ define <4 x double> @test_mm256_mask_expandloadu_pd(<4 x double> %__W, i8 zeroex ; X86-LABEL: test_mm256_mask_expandloadu_pd: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} ; X86-NEXT: retl @@ -6060,7 +6060,7 @@ define <4 x double> @test_mm256_maskz_expandloadu_pd(i8 zeroext %__U, ptr readon ; X86-LABEL: test_mm256_maskz_expandloadu_pd: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandpd (%eax), %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -6081,7 +6081,7 @@ define <2 x i64> @test_mm_mask_expandloadu_epi64(<2 x i64> %__W, i8 zeroext %__U ; X86-LABEL: test_mm_mask_expandloadu_epi64: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} ; X86-NEXT: retl @@ -6102,7 +6102,7 @@ define <2 x i64> @test_mm_maskz_expandloadu_epi64(i8 zeroext %__U, ptr readonly ; X86-LABEL: test_mm_maskz_expandloadu_epi64: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandq (%eax), %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6123,7 +6123,7 @@ define <4 x i64> @test_mm256_mask_expandloadu_epi64(<4 x i64> %__W, i8 zeroext % ; X86-LABEL: test_mm256_mask_expandloadu_epi64: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} ; X86-NEXT: retl @@ -6144,7 +6144,7 @@ define <4 x i64> @test_mm256_maskz_expandloadu_epi64(i8 zeroext %__U, ptr readon ; X86-LABEL: test_mm256_maskz_expandloadu_epi64: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandq (%eax), %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -6165,7 +6165,7 @@ define <4 x float> @test_mm_mask_expandloadu_ps(<4 x float> %__W, i8 zeroext %__ ; X86-LABEL: test_mm_mask_expandloadu_ps: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} ; X86-NEXT: retl @@ -6186,7 +6186,7 @@ define <4 x float> @test_mm_maskz_expandloadu_ps(i8 zeroext %__U, ptr readonly % ; X86-LABEL: test_mm_maskz_expandloadu_ps: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandps (%eax), %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6207,7 +6207,7 @@ define <8 x float> @test_mm256_mask_expandloadu_ps(<8 x float> %__W, i8 zeroext ; X86-LABEL: test_mm256_mask_expandloadu_ps: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} ; X86-NEXT: retl @@ -6227,7 +6227,7 @@ define <8 x float> @test_mm256_maskz_expandloadu_ps(i8 zeroext %__U, ptr readonl ; X86-LABEL: test_mm256_maskz_expandloadu_ps: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vexpandps (%eax), %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -6247,7 +6247,7 @@ define <2 x i64> @test_mm_mask_expandloadu_epi32(<2 x i64> %__W, i8 zeroext %__U ; X86-LABEL: test_mm_mask_expandloadu_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} ; X86-NEXT: retl @@ -6270,7 +6270,7 @@ define <2 x i64> @test_mm_maskz_expandloadu_epi32(i8 zeroext %__U, ptr readonly ; X86-LABEL: test_mm_maskz_expandloadu_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandd (%eax), %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6292,7 +6292,7 @@ define <4 x i64> @test_mm256_mask_expandloadu_epi32(<4 x i64> %__W, i8 zeroext % ; X86-LABEL: test_mm256_mask_expandloadu_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} ; X86-NEXT: retl @@ -6314,7 +6314,7 @@ define <4 x i64> @test_mm256_maskz_expandloadu_epi32(i8 zeroext %__U, ptr readon ; X86-LABEL: test_mm256_maskz_expandloadu_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: kmovw %ecx, %k1 ; X86-NEXT: vpexpandd (%eax), %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -6334,7 +6334,7 @@ entry: define void @test_mm_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_mask_compressstoreu_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcompresspd %xmm0, (%ecx) {%k1} @@ -6355,7 +6355,7 @@ entry: define void @test_mm256_mask_compressstoreu_pd(ptr %__P, i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_mask_compressstoreu_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcompresspd %ymm0, (%ecx) {%k1} @@ -6378,7 +6378,7 @@ entry: define void @test_mm_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_compressstoreu_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpcompressq %xmm0, (%ecx) {%k1} @@ -6399,7 +6399,7 @@ entry: define void @test_mm256_mask_compressstoreu_epi64(ptr %__P, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_compressstoreu_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpcompressq %ymm0, (%ecx) {%k1} @@ -6422,7 +6422,7 @@ entry: define void @test_mm_mask_compressstoreu_ps(ptr %__P, i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_compressstoreu_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcompressps %xmm0, (%ecx) {%k1} @@ -6443,7 +6443,7 @@ entry: define void @test_mm256_mask_compressstoreu_ps(ptr %__P, i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_mask_compressstoreu_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vcompressps %ymm0, (%ecx) {%k1} @@ -6465,7 +6465,7 @@ entry: define void @test_mm_mask_compressstoreu_epi32(ptr %__P, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_compressstoreu_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpcompressd %xmm0, (%ecx) {%k1} @@ -6487,7 +6487,7 @@ entry: define void @test_mm256_mask_compressstoreu_epi32(ptr %__P, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_compressstoreu_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vpcompressd %ymm0, (%ecx) {%k1} @@ -6516,7 +6516,7 @@ declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>) #8 define <2 x double> @test_mm_mask_sqrt_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_mask_sqrt_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtpd %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -6539,7 +6539,7 @@ declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) define <2 x double> @test_mm_maskz_sqrt_pd(i8 zeroext %__U, <2 x double> %__A) { ; X86-LABEL: test_mm_maskz_sqrt_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtpd %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6560,7 +6560,7 @@ entry: define <4 x double> @test_mm256_mask_sqrt_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_mask_sqrt_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtpd %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -6583,7 +6583,7 @@ declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) define <4 x double> @test_mm256_maskz_sqrt_pd(i8 zeroext %__U, <4 x double> %__A) { ; X86-LABEL: test_mm256_maskz_sqrt_pd: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtpd %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -6604,7 +6604,7 @@ entry: define <4 x float> @test_mm_mask_sqrt_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_mask_sqrt_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtps %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -6627,7 +6627,7 @@ declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) define <4 x float> @test_mm_maskz_sqrt_ps(i8 zeroext %__U, <4 x float> %__A) { ; X86-LABEL: test_mm_maskz_sqrt_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtps %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6648,7 +6648,7 @@ entry: define <8 x float> @test_mm256_mask_sqrt_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_mask_sqrt_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtps %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -6668,7 +6668,7 @@ entry: define <8 x float> @test_mm256_maskz_sqrt_ps(i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_maskz_sqrt_ps: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vsqrtps %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -6702,7 +6702,7 @@ entry: define <2 x i64> @test_mm_mask_rol_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_rol_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprold $5, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -6726,7 +6726,7 @@ entry: define <2 x i64> @test_mm_maskz_rol_epi32(i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_rol_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprold $5, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6761,7 +6761,7 @@ entry: define <4 x i64> @test_mm256_mask_rol_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_rol_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprold $5, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -6784,7 +6784,7 @@ entry: define <4 x i64> @test_mm256_maskz_rol_epi32(i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_rol_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprold $5, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -6816,7 +6816,7 @@ entry: define <2 x i64> @test_mm_mask_rol_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_rol_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolq $5, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -6837,7 +6837,7 @@ entry: define <2 x i64> @test_mm_maskz_rol_epi64(i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_rol_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolq $5, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6868,7 +6868,7 @@ entry: define <4 x i64> @test_mm256_mask_rol_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_rol_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolq $5, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -6889,7 +6889,7 @@ entry: define <4 x i64> @test_mm256_maskz_rol_epi64(i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_rol_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolq $5, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -6923,7 +6923,7 @@ entry: define <2 x i64> @test_mm_mask_rolv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_rolv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -6948,7 +6948,7 @@ entry: define <2 x i64> @test_mm_maskz_rolv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_rolv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -6985,7 +6985,7 @@ entry: define <4 x i64> @test_mm256_mask_rolv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_rolv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvd %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -7009,7 +7009,7 @@ entry: define <4 x i64> @test_mm256_maskz_rolv_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_rolv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -7042,7 +7042,7 @@ entry: define <2 x i64> @test_mm_mask_rolv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_rolv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvq %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -7063,7 +7063,7 @@ entry: define <2 x i64> @test_mm_maskz_rolv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_rolv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -7094,7 +7094,7 @@ entry: define <4 x i64> @test_mm256_mask_rolv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_rolv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvq %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -7115,7 +7115,7 @@ entry: define <4 x i64> @test_mm256_maskz_rolv_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_rolv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -7148,7 +7148,7 @@ entry: define <2 x i64> @test_mm_mask_ror_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_ror_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprord $5, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -7172,7 +7172,7 @@ entry: define <2 x i64> @test_mm_maskz_ror_epi32(i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_ror_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprord $5, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -7207,7 +7207,7 @@ entry: define <4 x i64> @test_mm256_mask_ror_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_ror_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprord $5, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -7230,7 +7230,7 @@ entry: define <4 x i64> @test_mm256_maskz_ror_epi32(i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_ror_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprord $5, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -7262,7 +7262,7 @@ entry: define <2 x i64> @test_mm_mask_ror_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_mask_ror_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorq $5, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -7283,7 +7283,7 @@ entry: define <2 x i64> @test_mm_maskz_ror_epi64(i8 zeroext %__U, <2 x i64> %__A) { ; X86-LABEL: test_mm_maskz_ror_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorq $5, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -7314,7 +7314,7 @@ entry: define <4 x i64> @test_mm256_mask_ror_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_mask_ror_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorq $5, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -7335,7 +7335,7 @@ entry: define <4 x i64> @test_mm256_maskz_ror_epi64(i8 zeroext %__U, <4 x i64> %__A) { ; X86-LABEL: test_mm256_maskz_ror_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorq $5, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -7369,7 +7369,7 @@ entry: define <2 x i64> @test_mm_mask_rorv_epi32(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_rorv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvd %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -7394,7 +7394,7 @@ entry: define <2 x i64> @test_mm_maskz_rorv_epi32(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_rorv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -7431,7 +7431,7 @@ entry: define <4 x i64> @test_mm256_mask_rorv_epi32(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_rorv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvd %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -7455,7 +7455,7 @@ entry: define <4 x i64> @test_mm256_maskz_rorv_epi32(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_rorv_epi32: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl @@ -7488,7 +7488,7 @@ entry: define <2 x i64> @test_mm_mask_rorv_epi64(<2 x i64> %__W, i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_mask_rorv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvq %xmm2, %xmm1, %xmm0 {%k1} ; X86-NEXT: retl @@ -7509,7 +7509,7 @@ entry: define <2 x i64> @test_mm_maskz_rorv_epi64(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { ; X86-LABEL: test_mm_maskz_rorv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl @@ -7540,7 +7540,7 @@ entry: define <4 x i64> @test_mm256_mask_rorv_epi64(<4 x i64> %__W, i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_mask_rorv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvq %ymm2, %ymm1, %ymm0 {%k1} ; X86-NEXT: retl @@ -7561,7 +7561,7 @@ entry: define <4 x i64> @test_mm256_maskz_rorv_epi64(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { ; X86-LABEL: test_mm256_maskz_rorv_epi64: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 ; X86-NEXT: vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index e7586e4f631584..3707c4861fe985 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -10753,7 +10753,7 @@ define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) ; X86: # %bb.0: ; X86-NEXT: vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1] ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -10876,7 +10876,7 @@ define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2 ; X86: # %bb.0: ; X86-NEXT: vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1] ; X86-NEXT: kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] ; X86-NEXT: andb %cl, %al # encoding: [0x20,0xc8] ; X86-NEXT: addb %cl, %al # encoding: [0x00,0xc8] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] diff --git a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll index 8ce36dd3c8ef5d..a47dbe570fd52d 100644 --- a/llvm/test/CodeGen/X86/bitcast-vector-bool.ll +++ b/llvm/test/CodeGen/X86/bitcast-vector-bool.ll @@ -119,7 +119,7 @@ define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind { ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax ; SSE2-SSSE3-NEXT: movd %eax, %xmm0 ; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: retq ; @@ -211,7 +211,7 @@ define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind { ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax ; SSE2-SSSE3-NEXT: movd %eax, %xmm0 ; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: retq ; @@ -384,7 +384,7 @@ define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind { ; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax ; SSE2-SSSE3-NEXT: movd %eax, %xmm0 ; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll index 04aff9a727c220..67b185daef7eb6 100644 --- a/llvm/test/CodeGen/X86/bitreverse.ll +++ b/llvm/test/CodeGen/X86/bitreverse.ll @@ -348,7 +348,7 @@ declare i8 @llvm.bitreverse.i8(i8) readnone define i8 @test_bitreverse_i8(i8 %a) { ; X86-LABEL: test_bitreverse_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rolb $4, %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andb $51, %cl @@ -397,7 +397,7 @@ declare i4 @llvm.bitreverse.i4(i4) readnone define i4 @test_bitreverse_i4(i4 %a) { ; X86-LABEL: test_bitreverse_i4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andb $15, %al ; X86-NEXT: movl %ecx, %edx @@ -528,7 +528,7 @@ define i4 @fold_i4() { define i8 @identity_i8(i8 %a) { ; X86-LABEL: identity_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; ; X64-LABEL: identity_i8: @@ -539,7 +539,7 @@ define i8 @identity_i8(i8 %a) { ; ; X86XOP-LABEL: identity_i8: ; X86XOP: # %bb.0: -; X86XOP-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86XOP-NEXT: movb {{[0-9]+}}(%esp), %al ; X86XOP-NEXT: retl %b = call i8 @llvm.bitreverse.i8(i8 %a) %c = call i8 @llvm.bitreverse.i8(i8 %b) diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll index 15c8ad471ea7f4..17be1300be386c 100644 --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -291,7 +291,7 @@ define i1 @andn_cmp_swap_ops(i64 %x, i64 %y) { define i1 @andn_cmp_i8(i8 %x, i8 %y) { ; X86-LABEL: andn_cmp_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: notb %al ; X86-NEXT: testb %al, {{[0-9]+}}(%esp) ; X86-NEXT: sete %al diff --git a/llvm/test/CodeGen/X86/bool-math.ll b/llvm/test/CodeGen/X86/bool-math.ll index e5919bc1cec429..c0a7a5bd4fbd59 100644 --- a/llvm/test/CodeGen/X86/bool-math.ll +++ b/llvm/test/CodeGen/X86/bool-math.ll @@ -55,7 +55,7 @@ define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) { ; ; X32-LABEL: sub_zext_cmp_mask_narrower_result: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: orb $46, %al ; X32-NEXT: retl @@ -77,7 +77,7 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) { ; ; X32-LABEL: add_zext_cmp_mask_same_size_result: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: xorb $27, %al ; X32-NEXT: retl @@ -120,7 +120,7 @@ define i8 @add_zext_cmp_mask_narrower_result(i32 %x) { ; ; X32-LABEL: add_zext_cmp_mask_narrower_result: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: xorb $43, %al ; X32-NEXT: retl @@ -205,7 +205,7 @@ define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) { ; ; X32-LABEL: low_bit_select_constants_bigger_true_same_size_result: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: xorb $-29, %al ; X32-NEXT: retl @@ -246,7 +246,7 @@ define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) { ; ; X32-LABEL: low_bit_select_constants_bigger_true_narrower_result: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: xorb $41, %al ; X32-NEXT: retl diff --git a/llvm/test/CodeGen/X86/bool-vector.ll b/llvm/test/CodeGen/X86/bool-vector.ll index 2cc7fa6ba864fb..abac07032d83de 100644 --- a/llvm/test/CodeGen/X86/bool-vector.ll +++ b/llvm/test/CodeGen/X86/bool-vector.ll @@ -9,9 +9,9 @@ define i32 @PR15215_bad(<4 x i32> %input) { ; X86-LABEL: PR15215_bad: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movb {{[0-9]+}}(%esp), %ah ; X86-NEXT: addb %ah, %ah ; X86-NEXT: andb $1, %cl diff --git a/llvm/test/CodeGen/X86/brcond.ll b/llvm/test/CodeGen/X86/brcond.ll index c933b69730806a..c2a580a37c8dbe 100644 --- a/llvm/test/CodeGen/X86/brcond.ll +++ b/llvm/test/CodeGen/X86/brcond.ll @@ -6,7 +6,7 @@ define i32 @test1(i32 %a, i32 %b) nounwind ssp { ; CHECK-LABEL: test1: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK-NEXT: xorb {{[0-9]+}}(%esp), %al ; CHECK-NEXT: testb $64, %al ; CHECK-NEXT: je LBB0_1 diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll index 3a792abda12305..dc9d2bcce79e14 100644 --- a/llvm/test/CodeGen/X86/bt.ll +++ b/llvm/test/CodeGen/X86/bt.ll @@ -1148,7 +1148,7 @@ define void @demanded_i32(ptr nocapture readonly, ptr nocapture, i32) nounwind { define zeroext i1 @demanded_with_known_zeroes(i32 %bit, i32 %bits) { ; X86-LABEL: demanded_with_known_zeroes: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $2, %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll index efd9d1105d975f..b77b4ed274e87d 100644 --- a/llvm/test/CodeGen/X86/btc_bts_btr.ll +++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll @@ -17,7 +17,7 @@ define i16 @btr_16(i16 %x, i16 %n) { ; X86-LABEL: btr_16: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: btrw %cx, %ax ; X86-NEXT: retl %1 = shl i16 1, %n @@ -36,7 +36,7 @@ define i16 @bts_16(i16 %x, i16 %n) { ; ; X86-LABEL: bts_16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: shll %cl, %eax ; X86-NEXT: orw {{[0-9]+}}(%esp), %ax @@ -57,7 +57,7 @@ define i16 @btc_16(i16 %x, i16 %n) { ; ; X86-LABEL: btc_16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: shll %cl, %eax ; X86-NEXT: xorw {{[0-9]+}}(%esp), %ax @@ -78,7 +78,7 @@ define i32 @btr_32(i32 %x, i32 %n) { ; X86-LABEL: btr_32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: btrl %ecx, %eax ; X86-NEXT: retl %1 = shl i32 1, %n @@ -97,7 +97,7 @@ define i32 @bts_32(i32 %x, i32 %n) { ; X86-LABEL: bts_32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: btsl %ecx, %eax ; X86-NEXT: retl %1 = shl i32 1, %n @@ -115,7 +115,7 @@ define i32 @btc_32(i32 %x, i32 %n) { ; X86-LABEL: btc_32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: btcl %ecx, %eax ; X86-NEXT: retl %1 = shl i32 1, %n @@ -132,7 +132,7 @@ define i64 @btr_64(i64 %x, i64 %n) { ; ; X86-LABEL: btr_64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -163,7 +163,7 @@ define i64 @bts_64(i64 %x, i64 %n) { ; ; X86-LABEL: bts_64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -191,7 +191,7 @@ define i64 @btc_64(i64 %x, i64 %n) { ; ; X86-LABEL: btc_64: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -224,7 +224,7 @@ define i16 @btr_16_mask(i16 %x, i16 %n) { ; X86-LABEL: btr_16_mask: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: btrw %cx, %ax ; X86-NEXT: retl %1 = and i16 %n, 15 @@ -245,7 +245,7 @@ define i16 @bts_16_mask(i16 %x, i16 %n) { ; ; X86-LABEL: bts_16_mask: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: shll %cl, %eax @@ -269,7 +269,7 @@ define i16 @btc_16_mask(i16 %x, i16 %n) { ; ; X86-LABEL: btc_16_mask: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: shll %cl, %eax @@ -292,7 +292,7 @@ define i32 @btr_32_mask(i32 %x, i32 %n) { ; X86-LABEL: btr_32_mask: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: btrl %ecx, %eax ; X86-NEXT: retl %1 = and i32 %n, 31 @@ -312,7 +312,7 @@ define i32 @bts_32_mask(i32 %x, i32 %n) { ; X86-LABEL: bts_32_mask: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: btsl %ecx, %eax ; X86-NEXT: retl %1 = and i32 %n, 31 @@ -331,7 +331,7 @@ define i32 @btc_32_mask(i32 %x, i32 %n) { ; X86-LABEL: btc_32_mask: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: btcl %ecx, %eax ; X86-NEXT: retl %1 = and i32 %n, 31 @@ -349,7 +349,7 @@ define i64 @btr_64_mask(i64 %x, i64 %n) { ; ; X86-LABEL: btr_64_mask: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -381,7 +381,7 @@ define i64 @bts_64_mask(i64 %x, i64 %n) { ; ; X86-LABEL: bts_64_mask: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -410,7 +410,7 @@ define i64 @btc_64_mask(i64 %x, i64 %n) { ; ; X86-LABEL: btc_64_mask: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -441,7 +441,7 @@ define i16 @btr_16_load(ptr %x, i16 %n) { ; ; X86-LABEL: btr_16_load: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzwl (%eax), %eax ; X86-NEXT: btrw %cx, %ax @@ -467,7 +467,7 @@ define i16 @bts_16_load(ptr %x, i16 %n) { ; X86-LABEL: bts_16_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: shll %cl, %eax ; X86-NEXT: orw (%edx), %ax @@ -493,7 +493,7 @@ define i16 @btc_16_load(ptr %x, i16 %n) { ; X86-LABEL: btc_16_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: shll %cl, %eax ; X86-NEXT: xorw (%edx), %ax @@ -514,7 +514,7 @@ define i32 @btr_32_load(ptr %x, i32 %n) { ; ; X86-LABEL: btr_32_load: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %eax ; X86-NEXT: btrl %ecx, %eax @@ -535,7 +535,7 @@ define i32 @bts_32_load(ptr %x, i32 %n) { ; ; X86-LABEL: bts_32_load: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %eax ; X86-NEXT: btsl %ecx, %eax @@ -555,7 +555,7 @@ define i32 @btc_32_load(ptr %x, i32 %n) { ; ; X86-LABEL: btc_32_load: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %eax ; X86-NEXT: btcl %ecx, %eax @@ -579,7 +579,7 @@ define i64 @btr_64_load(ptr %x, i64 %n) { ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -617,7 +617,7 @@ define i64 @bts_64_load(ptr %x, i64 %n) { ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -652,7 +652,7 @@ define i64 @btc_64_load(ptr %x, i64 %n) { ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: shldl %cl, %eax, %edx @@ -691,7 +691,7 @@ define void @btr_16_dont_fold(ptr %x, i16 %n) { ; X86-LABEL: btr_16_dont_fold: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movw $-2, %dx ; X86-NEXT: rolw %cl, %dx ; X86-NEXT: andw %dx, (%eax) @@ -717,7 +717,7 @@ define void @bts_16_dont_fold(ptr %x, i16 %n) { ; X86-LABEL: bts_16_dont_fold: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %edx ; X86-NEXT: shll %cl, %edx ; X86-NEXT: orw %dx, (%eax) @@ -742,7 +742,7 @@ define void @btc_16_dont_fold(ptr %x, i16 %n) { ; X86-LABEL: btc_16_dont_fold: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %edx ; X86-NEXT: shll %cl, %edx ; X86-NEXT: xorw %dx, (%eax) @@ -767,7 +767,7 @@ define void @btr_32_dont_fold(ptr %x, i32 %n) { ; X86-LABEL: btr_32_dont_fold: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $-2, %edx ; X86-NEXT: roll %cl, %edx ; X86-NEXT: andl %edx, (%eax) @@ -793,7 +793,7 @@ define void @bts_32_dont_fold(ptr %x, i32 %n) { ; X86-LABEL: bts_32_dont_fold: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %edx ; X86-NEXT: shll %cl, %edx ; X86-NEXT: orl %edx, (%eax) @@ -818,7 +818,7 @@ define void @btc_32_dont_fold(ptr %x, i32 %n) { ; X86-LABEL: btc_32_dont_fold: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %edx ; X86-NEXT: shll %cl, %edx ; X86-NEXT: xorl %edx, (%eax) @@ -846,7 +846,7 @@ define void @btr_64_dont_fold(ptr %x, i64 %n) { ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %edx ; X86-NEXT: xorl %esi, %esi ; X86-NEXT: shldl %cl, %edx, %esi @@ -888,7 +888,7 @@ define void @bts_64_dont_fold(ptr %x, i64 %n) { ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %edx ; X86-NEXT: xorl %esi, %esi ; X86-NEXT: shldl %cl, %edx, %esi @@ -927,7 +927,7 @@ define void @btc_64_dont_fold(ptr %x, i64 %n) { ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %edx ; X86-NEXT: xorl %esi, %esi ; X86-NEXT: shldl %cl, %edx, %esi @@ -960,7 +960,7 @@ define i32 @btr_32_mask_zeros(i32 %x, i32 %n) { ; ; X86-LABEL: btr_32_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $2, %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: btrl %ecx, %eax @@ -983,7 +983,7 @@ define i32 @bts_32_mask_zeros(i32 %x, i32 %n) { ; ; X86-LABEL: bts_32_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $2, %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: btsl %ecx, %eax @@ -1005,7 +1005,7 @@ define i32 @btc_32_mask_zeros(i32 %x, i32 %n) { ; ; X86-LABEL: btc_32_mask_zeros: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $2, %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: btcl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/byval5.ll b/llvm/test/CodeGen/X86/byval5.ll index 28deafcd982f53..2b929c596246b1 100644 --- a/llvm/test/CodeGen/X86/byval5.ll +++ b/llvm/test/CodeGen/X86/byval5.ll @@ -38,14 +38,14 @@ define void @g(i8 signext %a1, i8 signext %a2, i8 signext %a3, i8 signext %a ; X64-NEXT: movq %rsp, %rdi ; X64-NEXT: movq %rbx, %rsi ; X64-NEXT: rep;movsq (%rsi), %es:(%rdi) -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movb {{[0-9]+}}(%rsp), %al ; X64-NEXT: movb %al, {{[0-9]+}}(%rsp) ; X64-NEXT: callq f@PLT ; X64-NEXT: movl $16, %ecx ; X64-NEXT: movq %rsp, %rdi ; X64-NEXT: movq %rbx, %rsi ; X64-NEXT: rep;movsq (%rsi), %es:(%rdi) -; X64-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movb {{[0-9]+}}(%rsp), %al ; X64-NEXT: movb %al, {{[0-9]+}}(%rsp) ; X64-NEXT: callq f@PLT ; X64-NEXT: addq $272, %rsp # imm = 0x110 @@ -61,9 +61,9 @@ define void @g(i8 signext %a1, i8 signext %a2, i8 signext %a3, i8 signext %a ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $272, %esp # imm = 0x110 -; X86-NEXT: movzbl 28(%ebp), %eax -; X86-NEXT: movzbl 24(%ebp), %ecx -; X86-NEXT: movzbl 20(%ebp), %edx +; X86-NEXT: movb 28(%ebp), %al +; X86-NEXT: movb 24(%ebp), %cl +; X86-NEXT: movb 20(%ebp), %dl ; X86-NEXT: movb 16(%ebp), %ah ; X86-NEXT: movb 12(%ebp), %ch ; X86-NEXT: movb 8(%ebp), %dh @@ -78,14 +78,14 @@ define void @g(i8 signext %a1, i8 signext %a2, i8 signext %a3, i8 signext %a ; X86-NEXT: movl %esp, %edi ; X86-NEXT: movl %ebx, %esi ; X86-NEXT: rep;movsl (%esi), %es:(%edi) -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movb %al, {{[0-9]+}}(%esp) ; X86-NEXT: calll f@PLT ; X86-NEXT: movl $32, %ecx ; X86-NEXT: movl %esp, %edi ; X86-NEXT: movl %ebx, %esi ; X86-NEXT: rep;movsl (%esi), %es:(%edi) -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movb %al, {{[0-9]+}}(%esp) ; X86-NEXT: calll f@PLT ; X86-NEXT: leal -12(%ebp), %esp diff --git a/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll b/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll index dea3d04eb74d57..17c95cf0cf6fd3 100644 --- a/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll +++ b/llvm/test/CodeGen/X86/callbr-asm-instr-scheduling.ll @@ -26,7 +26,7 @@ define i64 @early_ioremap_pmd(i64 %addr) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movabsq $9223372036854771712, %rdx # imm = 0x7FFFFFFFFFFFF000 ; CHECK-NEXT: andq %rax, %rdx -; CHECK-NEXT: movzbl pgdir_shift(%rip), %eax +; CHECK-NEXT: movb pgdir_shift(%rip), %al ; CHECK-NEXT: movq page_offset_base(%rip), %rcx ; CHECK-NEXT: shrxq %rax, %rdi, %rax ; CHECK-NEXT: addq %rcx, %rdx diff --git a/llvm/test/CodeGen/X86/clear-highbits.ll b/llvm/test/CodeGen/X86/clear-highbits.ll index 755b1094234fd8..798cd394633459 100644 --- a/llvm/test/CodeGen/X86/clear-highbits.ll +++ b/llvm/test/CodeGen/X86/clear-highbits.ll @@ -24,8 +24,8 @@ define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind { ; X86-LABEL: clear_highbits8_c0: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: retl @@ -47,9 +47,9 @@ define i8 @clear_highbits8_c0(i8 %val, i8 %numhighbits) nounwind { define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind { ; X86-LABEL: clear_highbits8_c2_load: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: retl @@ -57,7 +57,7 @@ define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind { ; X64-LABEL: clear_highbits8_c2_load: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movzbl (%rdi), %eax +; X64-NEXT: movb (%rdi), %al ; X64-NEXT: shlb %cl, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shrb %cl, %al @@ -71,8 +71,8 @@ define i8 @clear_highbits8_c2_load(ptr %w, i8 %numhighbits) nounwind { define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind { ; X86-LABEL: clear_highbits8_c4_commutative: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: retl @@ -98,7 +98,7 @@ define i8 @clear_highbits8_c4_commutative(i8 %val, i8 %numhighbits) nounwind { define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits16_c0: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: movzwl %ax, %eax @@ -108,7 +108,7 @@ define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind { ; ; X86-BMI2-LABEL: clear_highbits16_c0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movzwl %cx, %ecx ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax @@ -140,7 +140,7 @@ define i16 @clear_highbits16_c0(i16 %val, i16 %numhighbits) nounwind { define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits16_c1_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: movzwl %ax, %eax @@ -150,7 +150,7 @@ define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind { ; ; X86-BMI2-LABEL: clear_highbits16_c1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movzwl %cx, %ecx ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax @@ -183,7 +183,7 @@ define i16 @clear_highbits16_c1_indexzext(i16 %val, i8 %numhighbits) nounwind { define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits16_c2_load: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzwl (%eax), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -194,7 +194,7 @@ define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind { ; ; X86-BMI2-LABEL: clear_highbits16_c2_load: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movzwl (%ecx), %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx @@ -231,7 +231,7 @@ define i16 @clear_highbits16_c2_load(ptr %w, i16 %numhighbits) nounwind { define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits16_c3_load_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzwl (%eax), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -242,7 +242,7 @@ define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind ; ; X86-BMI2-LABEL: clear_highbits16_c3_load_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movzwl (%ecx), %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %ecx @@ -280,7 +280,7 @@ define i16 @clear_highbits16_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits16_c4_commutative: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: movzwl %ax, %eax @@ -290,7 +290,7 @@ define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind ; ; X86-BMI2-LABEL: clear_highbits16_c4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movzwl %cx, %ecx ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax @@ -326,7 +326,7 @@ define i16 @clear_highbits16_c4_commutative(i16 %val, i16 %numhighbits) nounwind define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits32_c0: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax @@ -334,7 +334,7 @@ define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind { ; ; X86-BMI2-LABEL: clear_highbits32_c0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $32, %ecx ; X86-BMI2-NEXT: subl %eax, %ecx ; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax @@ -363,7 +363,7 @@ define i32 @clear_highbits32_c0(i32 %val, i32 %numhighbits) nounwind { define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits32_c1_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax @@ -371,7 +371,7 @@ define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind { ; ; X86-BMI2-LABEL: clear_highbits32_c1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $32, %ecx ; X86-BMI2-NEXT: subl %eax, %ecx ; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax @@ -401,7 +401,7 @@ define i32 @clear_highbits32_c1_indexzext(i32 %val, i8 %numhighbits) nounwind { define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits32_c2_load: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl (%eax), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -411,7 +411,7 @@ define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind { ; X86-BMI2-LABEL: clear_highbits32_c2_load: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $32, %edx ; X86-BMI2-NEXT: subl %ecx, %edx ; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax @@ -441,7 +441,7 @@ define i32 @clear_highbits32_c2_load(ptr %w, i32 %numhighbits) nounwind { define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits32_c3_load_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl (%eax), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -451,7 +451,7 @@ define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind ; X86-BMI2-LABEL: clear_highbits32_c3_load_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $32, %edx ; X86-BMI2-NEXT: subl %ecx, %edx ; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax @@ -482,7 +482,7 @@ define i32 @clear_highbits32_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind { ; X86-NOBMI2-LABEL: clear_highbits32_c4_commutative: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax @@ -490,7 +490,7 @@ define i32 @clear_highbits32_c4_commutative(i32 %val, i32 %numhighbits) nounwind ; ; X86-BMI2-LABEL: clear_highbits32_c4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $32, %ecx ; X86-BMI2-NEXT: subl %eax, %ecx ; X86-BMI2-NEXT: bzhil %ecx, {{[0-9]+}}(%esp), %eax @@ -524,7 +524,7 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind { ; X86-BASELINE-LABEL: clear_highbits64_c0: ; X86-BASELINE: # %bb.0: ; X86-BASELINE-NEXT: pushl %esi -; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BASELINE-NEXT: movl $-1, %eax ; X86-BASELINE-NEXT: movl $-1, %esi ; X86-BASELINE-NEXT: shrl %cl, %esi @@ -545,7 +545,7 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind { ; X86-BMI1-LABEL: clear_highbits64_c0: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %esi ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shrl %cl, %eax @@ -561,7 +561,7 @@ define i64 @clear_highbits64_c0(i64 %val, i64 %numhighbits) nounwind { ; X86-BMI2-LABEL: clear_highbits64_c0: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %eax ; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi ; X86-BMI2-NEXT: xorl %edx, %edx @@ -597,7 +597,7 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind { ; X86-BASELINE-LABEL: clear_highbits64_c1_indexzext: ; X86-BASELINE: # %bb.0: ; X86-BASELINE-NEXT: pushl %esi -; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BASELINE-NEXT: movl $-1, %eax ; X86-BASELINE-NEXT: movl $-1, %esi ; X86-BASELINE-NEXT: shrl %cl, %esi @@ -618,7 +618,7 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind { ; X86-BMI1-LABEL: clear_highbits64_c1_indexzext: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %esi ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shrl %cl, %eax @@ -634,7 +634,7 @@ define i64 @clear_highbits64_c1_indexzext(i64 %val, i8 %numhighbits) nounwind { ; X86-BMI2-LABEL: clear_highbits64_c1_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %eax ; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi ; X86-BMI2-NEXT: xorl %edx, %edx @@ -673,7 +673,7 @@ define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind { ; X86-BASELINE-NEXT: pushl %edi ; X86-BASELINE-NEXT: pushl %esi ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BASELINE-NEXT: movl $-1, %eax ; X86-BASELINE-NEXT: movl $-1, %edi ; X86-BASELINE-NEXT: shrl %cl, %edi @@ -697,7 +697,7 @@ define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind { ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %edi ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shrl %cl, %eax @@ -716,7 +716,7 @@ define i64 @clear_highbits64_c2_load(ptr %w, i64 %numhighbits) nounwind { ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %eax ; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi ; X86-BMI2-NEXT: xorl %edx, %edx @@ -756,7 +756,7 @@ define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind ; X86-BASELINE-NEXT: pushl %edi ; X86-BASELINE-NEXT: pushl %esi ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BASELINE-NEXT: movl $-1, %eax ; X86-BASELINE-NEXT: movl $-1, %edi ; X86-BASELINE-NEXT: shrl %cl, %edi @@ -780,7 +780,7 @@ define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %edi ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shrl %cl, %eax @@ -799,7 +799,7 @@ define i64 @clear_highbits64_c3_load_indexzext(ptr %w, i8 %numhighbits) nounwind ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %eax ; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi ; X86-BMI2-NEXT: xorl %edx, %edx @@ -838,7 +838,7 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind ; X86-BASELINE-LABEL: clear_highbits64_c4_commutative: ; X86-BASELINE: # %bb.0: ; X86-BASELINE-NEXT: pushl %esi -; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BASELINE-NEXT: movl $-1, %eax ; X86-BASELINE-NEXT: movl $-1, %esi ; X86-BASELINE-NEXT: shrl %cl, %esi @@ -859,7 +859,7 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind ; X86-BMI1-LABEL: clear_highbits64_c4_commutative: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %esi ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shrl %cl, %eax @@ -875,7 +875,7 @@ define i64 @clear_highbits64_c4_commutative(i64 %val, i64 %numhighbits) nounwind ; X86-BMI2-LABEL: clear_highbits64_c4_commutative: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %eax ; X86-BMI2-NEXT: shrxl %ecx, %eax, %esi ; X86-BMI2-NEXT: xorl %edx, %edx @@ -915,7 +915,7 @@ define i32 @oneuse32_c(i32 %val, i32 %numhighbits, ptr %escape) nounwind { ; X86-NOBMI2-LABEL: oneuse32_c: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: movl %eax, (%edx) @@ -925,7 +925,7 @@ define i32 @oneuse32_c(i32 %val, i32 %numhighbits, ptr %escape) nounwind { ; X86-BMI2-LABEL: oneuse32_c: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shrxl %eax, %edx, %eax ; X86-BMI2-NEXT: movl %eax, (%ecx) @@ -960,7 +960,7 @@ define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind { ; X86-BASELINE: # %bb.0: ; X86-BASELINE-NEXT: pushl %esi ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BASELINE-NEXT: movl $-1, %eax ; X86-BASELINE-NEXT: movl $-1, %edx ; X86-BASELINE-NEXT: shrl %cl, %edx @@ -982,7 +982,7 @@ define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind { ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: movl $-1, %edi ; X86-BMI1-NEXT: shrl %cl, %edi @@ -1003,7 +1003,7 @@ define i64 @oneuse64_c(i64 %val, i64 %numhighbits, ptr %escape) nounwind { ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %eax ; X86-BMI2-NEXT: shrxl %ebx, %eax, %esi ; X86-BMI2-NEXT: xorl %edx, %edx @@ -1045,7 +1045,7 @@ define i32 @oneuse32_d(i32 %val, i32 %numhighbits, ptr %escape) nounwind { ; X86-NOBMI2-LABEL: oneuse32_d: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: movl %eax, (%edx) @@ -1055,7 +1055,7 @@ define i32 @oneuse32_d(i32 %val, i32 %numhighbits, ptr %escape) nounwind { ; X86-BMI2-LABEL: oneuse32_d: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shlxl %ecx, {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl %edx, (%eax) ; X86-BMI2-NEXT: shrxl %ecx, %edx, %eax @@ -1089,7 +1089,7 @@ define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind { ; X86-BASELINE-NEXT: pushl %ebx ; X86-BASELINE-NEXT: pushl %edi ; X86-BASELINE-NEXT: pushl %esi -; X86-BASELINE-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BASELINE-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BASELINE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BASELINE-NEXT: movl %edx, %edi @@ -1131,7 +1131,7 @@ define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind { ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl %edx, %eax @@ -1161,7 +1161,7 @@ define i64 @oneusei64_d(i64 %val, i64 %numhighbits, ptr %escape) nounwind { ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shldl %cl, %eax, %esi diff --git a/llvm/test/CodeGen/X86/clear-lowbits.ll b/llvm/test/CodeGen/X86/clear-lowbits.ll index 49ea2d0f1ed7af..1e28809cd43ebf 100644 --- a/llvm/test/CodeGen/X86/clear-lowbits.ll +++ b/llvm/test/CodeGen/X86/clear-lowbits.ll @@ -26,8 +26,8 @@ define i8 @clear_lowbits8_c0(i8 %val, i8 %numlowbits) nounwind { ; X86-LABEL: clear_lowbits8_c0: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: retl @@ -49,9 +49,9 @@ define i8 @clear_lowbits8_c0(i8 %val, i8 %numlowbits) nounwind { define i8 @clear_lowbits8_c2_load(ptr %w, i8 %numlowbits) nounwind { ; X86-LABEL: clear_lowbits8_c2_load: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: retl @@ -59,7 +59,7 @@ define i8 @clear_lowbits8_c2_load(ptr %w, i8 %numlowbits) nounwind { ; X64-LABEL: clear_lowbits8_c2_load: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movzbl (%rdi), %eax +; X64-NEXT: movb (%rdi), %al ; X64-NEXT: shrb %cl, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: shlb %cl, %al @@ -73,8 +73,8 @@ define i8 @clear_lowbits8_c2_load(ptr %w, i8 %numlowbits) nounwind { define i8 @clear_lowbits8_c4_commutative(i8 %val, i8 %numlowbits) nounwind { ; X86-LABEL: clear_lowbits8_c4_commutative: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: retl @@ -98,7 +98,7 @@ define i8 @clear_lowbits8_c4_commutative(i8 %val, i8 %numlowbits) nounwind { define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits16_c0: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -108,7 +108,7 @@ define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind { ; X86-BMI2-LABEL: clear_lowbits16_c0: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax @@ -139,7 +139,7 @@ define i16 @clear_lowbits16_c0(i16 %val, i16 %numlowbits) nounwind { define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits16_c1_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -149,7 +149,7 @@ define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind { ; X86-BMI2-LABEL: clear_lowbits16_c1_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax @@ -181,7 +181,7 @@ define i16 @clear_lowbits16_c1_indexzext(i16 %val, i8 %numlowbits) nounwind { define i16 @clear_lowbits16_c2_load(ptr %w, i16 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits16_c2_load: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzwl (%eax), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax @@ -191,7 +191,7 @@ define i16 @clear_lowbits16_c2_load(ptr %w, i16 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: clear_lowbits16_c2_load: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movzwl (%ecx), %ecx ; X86-BMI2-NEXT: shrxl %eax, %ecx, %ecx @@ -225,7 +225,7 @@ define i16 @clear_lowbits16_c2_load(ptr %w, i16 %numlowbits) nounwind { define i16 @clear_lowbits16_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits16_c3_load_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movzwl (%eax), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax @@ -235,7 +235,7 @@ define i16 @clear_lowbits16_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: clear_lowbits16_c3_load_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movzwl (%ecx), %ecx ; X86-BMI2-NEXT: shrxl %eax, %ecx, %ecx @@ -270,7 +270,7 @@ define i16 @clear_lowbits16_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits16_c4_commutative: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -280,7 +280,7 @@ define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind { ; X86-BMI2-LABEL: clear_lowbits16_c4_commutative: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: # kill: def $ax killed $ax killed $eax @@ -313,7 +313,7 @@ define i16 @clear_lowbits16_c4_commutative(i16 %val, i16 %numlowbits) nounwind { define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits32_c0: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -321,7 +321,7 @@ define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: clear_lowbits32_c0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -348,7 +348,7 @@ define i32 @clear_lowbits32_c0(i32 %val, i32 %numlowbits) nounwind { define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits32_c1_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -356,7 +356,7 @@ define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: clear_lowbits32_c1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -384,7 +384,7 @@ define i32 @clear_lowbits32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { define i32 @clear_lowbits32_c2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits32_c2_load: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl (%eax), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax @@ -394,7 +394,7 @@ define i32 @clear_lowbits32_c2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-BMI2-LABEL: clear_lowbits32_c2_load: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: retl @@ -422,7 +422,7 @@ define i32 @clear_lowbits32_c2_load(ptr %w, i32 %numlowbits) nounwind { define i32 @clear_lowbits32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits32_c3_load_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl (%eax), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax @@ -432,7 +432,7 @@ define i32 @clear_lowbits32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; X86-BMI2-LABEL: clear_lowbits32_c3_load_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: retl @@ -461,7 +461,7 @@ define i32 @clear_lowbits32_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits32_c4_commutative: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -469,7 +469,7 @@ define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: clear_lowbits32_c4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -498,7 +498,7 @@ define i32 @clear_lowbits32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits64_c0: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -514,7 +514,7 @@ define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: clear_lowbits64_c0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: testb $32, %cl @@ -549,7 +549,7 @@ define i64 @clear_lowbits64_c0(i64 %val, i64 %numlowbits) nounwind { define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits64_c1_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -565,7 +565,7 @@ define i64 @clear_lowbits64_c1_indexzext(i64 %val, i8 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: clear_lowbits64_c1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: testb $32, %cl @@ -604,7 +604,7 @@ define i64 @clear_lowbits64_c2_load(ptr %w, i64 %numlowbits) nounwind { ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -623,7 +623,7 @@ define i64 @clear_lowbits64_c2_load(ptr %w, i64 %numlowbits) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax ; X86-BMI2-NEXT: testb $32, %bl @@ -662,7 +662,7 @@ define i64 @clear_lowbits64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -681,7 +681,7 @@ define i64 @clear_lowbits64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax ; X86-BMI2-NEXT: testb $32, %bl @@ -720,7 +720,7 @@ define i64 @clear_lowbits64_c3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits64_c4_commutative: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax @@ -736,7 +736,7 @@ define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: clear_lowbits64_c4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ecx, %edx, %eax ; X86-BMI2-NEXT: testb $32, %cl @@ -777,7 +777,7 @@ define i64 @clear_lowbits64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { define i8 @clear_lowbits8_ic0(i8 %val, i8 %numlowbits) nounwind { ; X86-LABEL: clear_lowbits8_ic0: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movb $8, %cl ; X86-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shrb %cl, %al @@ -803,7 +803,7 @@ define i8 @clear_lowbits8_ic2_load(ptr %w, i8 %numlowbits) nounwind { ; X86-LABEL: clear_lowbits8_ic2_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: movb $8, %cl ; X86-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shrb %cl, %al @@ -812,7 +812,7 @@ define i8 @clear_lowbits8_ic2_load(ptr %w, i8 %numlowbits) nounwind { ; ; X64-LABEL: clear_lowbits8_ic2_load: ; X64: # %bb.0: -; X64-NEXT: movzbl (%rdi), %eax +; X64-NEXT: movb (%rdi), %al ; X64-NEXT: movb $8, %cl ; X64-NEXT: subb %sil, %cl ; X64-NEXT: shrb %cl, %al @@ -828,7 +828,7 @@ define i8 @clear_lowbits8_ic2_load(ptr %w, i8 %numlowbits) nounwind { define i8 @clear_lowbits8_ic4_commutative(i8 %val, i8 %numlowbits) nounwind { ; X86-LABEL: clear_lowbits8_ic4_commutative: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movb $8, %cl ; X86-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shrb %cl, %al @@ -1613,7 +1613,7 @@ define i32 @oneuse32_c(i32 %val, i32 %numlowbits, ptr %escape) nounwind { ; X86-NOBMI2-LABEL: oneuse32_c: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: movl %eax, (%edx) @@ -1623,7 +1623,7 @@ define i32 @oneuse32_c(i32 %val, i32 %numlowbits, ptr %escape) nounwind { ; X86-BMI2-LABEL: oneuse32_c: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %eax, %edx, %eax ; X86-BMI2-NEXT: movl %eax, (%ecx) @@ -1659,7 +1659,7 @@ define i64 @oneuse64(i64 %val, i64 %numlowbits, ptr %escape) nounwind { ; X86-NOBMI2-NEXT: pushl %edi ; X86-NOBMI2-NEXT: pushl %esi ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %edx ; X86-NOBMI2-NEXT: movl $-1, %edi ; X86-NOBMI2-NEXT: shll %cl, %edi @@ -1685,7 +1685,7 @@ define i64 @oneuse64(i64 %val, i64 %numlowbits, ptr %escape) nounwind { ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ebx, %edx, %esi ; X86-BMI2-NEXT: xorl %eax, %eax diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll index b66902fff3f1be..fe71e1f06d31b8 100644 --- a/llvm/test/CodeGen/X86/clz.ll +++ b/llvm/test/CodeGen/X86/clz.ll @@ -302,7 +302,7 @@ define i64 @ctlz_i64(i64 %x) { define i8 @ctlz_i8_zero_test(i8 %n) { ; X86-LABEL: ctlz_i8_zero_test: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: testb %al, %al ; X86-NEXT: je .LBB8_1 ; X86-NEXT: # %bb.2: # %cond.false @@ -512,7 +512,7 @@ define i64 @ctlz_i64_zero_test(i64 %n) { define i8 @cttz_i8_zero_test(i8 %n) { ; X86-LABEL: cttz_i8_zero_test: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: testb %al, %al ; X86-NEXT: je .LBB12_1 ; X86-NEXT: # %bb.2: # %cond.false @@ -819,7 +819,7 @@ define i32 @ctlz_bsr_zero_test(i32 %n) { define i8 @cttz_i8_knownbits(i8 %x) { ; X86-LABEL: cttz_i8_knownbits: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: orb $2, %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: bsfl %eax, %eax @@ -836,7 +836,7 @@ define i8 @cttz_i8_knownbits(i8 %x) { ; ; X86-CLZ-LABEL: cttz_i8_knownbits: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-CLZ-NEXT: orb $2, %al ; X86-CLZ-NEXT: movzbl %al, %eax ; X86-CLZ-NEXT: tzcntl %eax, %eax @@ -859,7 +859,7 @@ define i8 @cttz_i8_knownbits(i8 %x) { define i8 @ctlz_i8_knownbits(i8 %x) { ; X86-LABEL: ctlz_i8_knownbits: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: orb $64, %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: bsrl %eax, %eax @@ -878,7 +878,7 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; ; X86-CLZ-LABEL: ctlz_i8_knownbits: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-CLZ-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-CLZ-NEXT: orb $64, %al ; X86-CLZ-NEXT: movzbl %al, %eax ; X86-CLZ-NEXT: lzcntl %eax, %eax diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 94df5fa6d96fcf..2296ac5e4604a5 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -85,11 +85,11 @@ define i1 @test4() nounwind { ; CHECK-NEXT: xorb $1, %cl ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: sarl %cl, %edx -; CHECK-NEXT: movzbl g_96(%rip), %eax +; CHECK-NEXT: movb g_96(%rip), %al ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je .LBB3_2 ; CHECK-NEXT: # %bb.1: # %bb.i.i.i -; CHECK-NEXT: movzbl g_100(%rip), %ecx +; CHECK-NEXT: movb g_100(%rip), %cl ; CHECK-NEXT: .LBB3_2: # %func_4.exit.i ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: testb %dl, %dl @@ -102,7 +102,7 @@ define i1 @test4() nounwind { ; CHECK-NEXT: testb %bl, %bl ; CHECK-NEXT: jne .LBB3_5 ; CHECK-NEXT: # %bb.4: # %bb.i.i -; CHECK-NEXT: movzbl g_100(%rip), %ecx +; CHECK-NEXT: movb g_100(%rip), %cl ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: .LBB3_5: # %func_1.exit diff --git a/llvm/test/CodeGen/X86/cmovcmov.ll b/llvm/test/CodeGen/X86/cmovcmov.ll index ab863dee69010e..8407df67c16457 100644 --- a/llvm/test/CodeGen/X86/cmovcmov.ll +++ b/llvm/test/CodeGen/X86/cmovcmov.ll @@ -339,7 +339,7 @@ define dso_local void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) nounwi ; NOCMOV-NEXT: movb %al, g8 ; NOCMOV-NEXT: retl ; NOCMOV-NEXT: .LBB7_1: # %entry -; NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl ; NOCMOV-NEXT: jg .LBB7_4 ; NOCMOV-NEXT: .LBB7_3: # %entry ; NOCMOV-NEXT: movl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/combine-andintoload.ll b/llvm/test/CodeGen/X86/combine-andintoload.ll index caca66adc111ce..f81442330639fe 100644 --- a/llvm/test/CodeGen/X86/combine-andintoload.ll +++ b/llvm/test/CodeGen/X86/combine-andintoload.ll @@ -10,7 +10,7 @@ define zeroext i1 @bigger(ptr nocapture readonly %c, ptr nocapture readonly %e, ; CHECK-NEXT: movl $5, %r8d ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shll %cl, %r8d -; CHECK-NEXT: movzbl (%rsi,%rdx), %eax +; CHECK-NEXT: movb (%rsi,%rdx), %al ; CHECK-NEXT: xorb (%rdi,%rdx), %al ; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: andl %r8d, %eax diff --git a/llvm/test/CodeGen/X86/combine-bswap.ll b/llvm/test/CodeGen/X86/combine-bswap.ll index 017dc960bd1718..e7b14d48b61da0 100644 --- a/llvm/test/CodeGen/X86/combine-bswap.ll +++ b/llvm/test/CodeGen/X86/combine-bswap.ll @@ -176,13 +176,13 @@ define void @demand_one_loaded_byte(ptr %xp, ptr %yp) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: movb 4(%ecx), %cl ; X86-NEXT: movb %cl, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: demand_one_loaded_byte: ; X64: # %bb.0: -; X64-NEXT: movzbl 4(%rdi), %eax +; X64-NEXT: movb 4(%rdi), %al ; X64-NEXT: movb %al, (%rsi) ; X64-NEXT: retq %x = load i64, ptr %xp, align 8 diff --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll index 10787dce3e7e29..8f4a716d71cae7 100644 --- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll @@ -13,7 +13,7 @@ define i8 @test_i8_7_mask_lshr_1(i8 %a0) { ; X86-LABEL: test_i8_7_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $6, %al ; X86-NEXT: shrb %al ; X86-NEXT: retl @@ -33,7 +33,7 @@ define i8 @test_i8_7_mask_lshr_1(i8 %a0) { define i8 @test_i8_28_mask_lshr_1(i8 %a0) { ; X86-LABEL: test_i8_28_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $28, %al ; X86-NEXT: shrb %al ; X86-NEXT: retl @@ -52,7 +52,7 @@ define i8 @test_i8_28_mask_lshr_1(i8 %a0) { define i8 @test_i8_28_mask_lshr_2(i8 %a0) { ; X86-LABEL: test_i8_28_mask_lshr_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $28, %al ; X86-NEXT: shrb $2, %al ; X86-NEXT: retl @@ -71,7 +71,7 @@ define i8 @test_i8_28_mask_lshr_2(i8 %a0) { define i8 @test_i8_28_mask_lshr_3(i8 %a0) { ; X86-LABEL: test_i8_28_mask_lshr_3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $24, %al ; X86-NEXT: shrb $3, %al ; X86-NEXT: retl @@ -90,7 +90,7 @@ define i8 @test_i8_28_mask_lshr_3(i8 %a0) { define i8 @test_i8_28_mask_lshr_4(i8 %a0) { ; X86-LABEL: test_i8_28_mask_lshr_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $16, %al ; X86-NEXT: shrb $4, %al ; X86-NEXT: retl @@ -110,7 +110,7 @@ define i8 @test_i8_28_mask_lshr_4(i8 %a0) { define i8 @test_i8_224_mask_lshr_1(i8 %a0) { ; X86-LABEL: test_i8_224_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $-32, %al ; X86-NEXT: shrb %al ; X86-NEXT: retl @@ -129,7 +129,7 @@ define i8 @test_i8_224_mask_lshr_1(i8 %a0) { define i8 @test_i8_224_mask_lshr_4(i8 %a0) { ; X86-LABEL: test_i8_224_mask_lshr_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $-32, %al ; X86-NEXT: shrb $4, %al ; X86-NEXT: retl @@ -148,7 +148,7 @@ define i8 @test_i8_224_mask_lshr_4(i8 %a0) { define i8 @test_i8_224_mask_lshr_5(i8 %a0) { ; X86-LABEL: test_i8_224_mask_lshr_5: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb $5, %al ; X86-NEXT: retl ; @@ -165,7 +165,7 @@ define i8 @test_i8_224_mask_lshr_5(i8 %a0) { define i8 @test_i8_224_mask_lshr_6(i8 %a0) { ; X86-LABEL: test_i8_224_mask_lshr_6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb $6, %al ; X86-NEXT: retl ; @@ -185,7 +185,7 @@ define i8 @test_i8_224_mask_lshr_6(i8 %a0) { define i8 @test_i8_7_mask_ashr_1(i8 %a0) { ; X86-LABEL: test_i8_7_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $6, %al ; X86-NEXT: shrb %al ; X86-NEXT: retl @@ -205,7 +205,7 @@ define i8 @test_i8_7_mask_ashr_1(i8 %a0) { define i8 @test_i8_28_mask_ashr_1(i8 %a0) { ; X86-LABEL: test_i8_28_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $28, %al ; X86-NEXT: shrb %al ; X86-NEXT: retl @@ -224,7 +224,7 @@ define i8 @test_i8_28_mask_ashr_1(i8 %a0) { define i8 @test_i8_28_mask_ashr_2(i8 %a0) { ; X86-LABEL: test_i8_28_mask_ashr_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $28, %al ; X86-NEXT: shrb $2, %al ; X86-NEXT: retl @@ -243,7 +243,7 @@ define i8 @test_i8_28_mask_ashr_2(i8 %a0) { define i8 @test_i8_28_mask_ashr_3(i8 %a0) { ; X86-LABEL: test_i8_28_mask_ashr_3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $24, %al ; X86-NEXT: shrb $3, %al ; X86-NEXT: retl @@ -262,7 +262,7 @@ define i8 @test_i8_28_mask_ashr_3(i8 %a0) { define i8 @test_i8_28_mask_ashr_4(i8 %a0) { ; X86-LABEL: test_i8_28_mask_ashr_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $16, %al ; X86-NEXT: shrb $4, %al ; X86-NEXT: retl @@ -282,7 +282,7 @@ define i8 @test_i8_28_mask_ashr_4(i8 %a0) { define i8 @test_i8_224_mask_ashr_1(i8 %a0) { ; X86-LABEL: test_i8_224_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $-32, %al ; X86-NEXT: sarb %al ; X86-NEXT: retl @@ -301,7 +301,7 @@ define i8 @test_i8_224_mask_ashr_1(i8 %a0) { define i8 @test_i8_224_mask_ashr_4(i8 %a0) { ; X86-LABEL: test_i8_224_mask_ashr_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $-32, %al ; X86-NEXT: sarb $4, %al ; X86-NEXT: retl @@ -320,7 +320,7 @@ define i8 @test_i8_224_mask_ashr_4(i8 %a0) { define i8 @test_i8_224_mask_ashr_5(i8 %a0) { ; X86-LABEL: test_i8_224_mask_ashr_5: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: sarb $5, %al ; X86-NEXT: retl ; @@ -337,7 +337,7 @@ define i8 @test_i8_224_mask_ashr_5(i8 %a0) { define i8 @test_i8_224_mask_ashr_6(i8 %a0) { ; X86-LABEL: test_i8_224_mask_ashr_6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: sarb $6, %al ; X86-NEXT: retl ; @@ -357,7 +357,7 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) { define i8 @test_i8_7_mask_shl_1(i8 %a0) { ; X86-LABEL: test_i8_7_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $7, %al ; X86-NEXT: addb %al, %al ; X86-NEXT: retl @@ -376,7 +376,7 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) { define i8 @test_i8_7_mask_shl_4(i8 %a0) { ; X86-LABEL: test_i8_7_mask_shl_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $7, %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: retl @@ -395,7 +395,7 @@ define i8 @test_i8_7_mask_shl_4(i8 %a0) { define i8 @test_i8_7_mask_shl_5(i8 %a0) { ; X86-LABEL: test_i8_7_mask_shl_5: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $5, %al ; X86-NEXT: retl ; @@ -412,7 +412,7 @@ define i8 @test_i8_7_mask_shl_5(i8 %a0) { define i8 @test_i8_7_mask_shl_6(i8 %a0) { ; X86-LABEL: test_i8_7_mask_shl_6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $6, %al ; X86-NEXT: retl ; @@ -430,7 +430,7 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) { define i8 @test_i8_28_mask_shl_1(i8 %a0) { ; X86-LABEL: test_i8_28_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $28, %al ; X86-NEXT: addb %al, %al ; X86-NEXT: retl @@ -449,7 +449,7 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) { define i8 @test_i8_28_mask_shl_2(i8 %a0) { ; X86-LABEL: test_i8_28_mask_shl_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $28, %al ; X86-NEXT: shlb $2, %al ; X86-NEXT: retl @@ -468,7 +468,7 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) { define i8 @test_i8_28_mask_shl_3(i8 %a0) { ; X86-LABEL: test_i8_28_mask_shl_3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $28, %al ; X86-NEXT: shlb $3, %al ; X86-NEXT: retl @@ -487,7 +487,7 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) { define i8 @test_i8_28_mask_shl_4(i8 %a0) { ; X86-LABEL: test_i8_28_mask_shl_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $12, %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: retl @@ -507,7 +507,7 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) { define i8 @test_i8_224_mask_shl_1(i8 %a0) { ; X86-LABEL: test_i8_224_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $96, %al ; X86-NEXT: addb %al, %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/copy-eflags.ll b/llvm/test/CodeGen/X86/copy-eflags.ll index cf72a34831371b..5dfe464b4067eb 100644 --- a/llvm/test/CodeGen/X86/copy-eflags.ll +++ b/llvm/test/CodeGen/X86/copy-eflags.ll @@ -18,7 +18,7 @@ declare dso_local void @external(i32) define dso_local i32 @test1() nounwind { ; X32-LABEL: test1: ; X32: # %bb.0: # %entry -; X32-NEXT: movzbl b, %ecx +; X32-NEXT: movb b, %cl ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: incb %al ; X32-NEXT: movb %al, b @@ -44,12 +44,12 @@ define dso_local i32 @test1() nounwind { ; X64-LABEL: test1: ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rax -; X64-NEXT: movzbl b(%rip), %ecx +; X64-NEXT: movb b(%rip), %cl ; X64-NEXT: leal 1(%rcx), %eax ; X64-NEXT: movb %al, b(%rip) ; X64-NEXT: incl c(%rip) ; X64-NEXT: sete %dl -; X64-NEXT: movzbl a(%rip), %esi +; X64-NEXT: movb a(%rip), %sil ; X64-NEXT: leal 1(%rsi), %edi ; X64-NEXT: cmpb %cl, %sil ; X64-NEXT: sete d(%rip) diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll index 1b73acbcb68282..cb91f109b0074c 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll @@ -966,7 +966,7 @@ define i32 @multiple_bb(i32 %x, i32 %y, ptr %divdst, i1 zeroext %store_srem, ptr ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: cltd diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll index 6643ada2f42b44..f38ed2e1bf84e5 100644 --- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll @@ -966,7 +966,7 @@ define i32 @multiple_bb(i32 %x, i32 %y, ptr %divdst, i1 zeroext %store_urem, ptr ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: xorl %edx, %edx diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll index 3958d5f85e20bb..6cb36ea5609b73 100644 --- a/llvm/test/CodeGen/X86/divide-by-constant.ll +++ b/llvm/test/CodeGen/X86/divide-by-constant.ll @@ -162,7 +162,7 @@ define i32 @test7(i32 %x) nounwind { define i8 @test8(i8 %x) nounwind { ; X32-LABEL: test8: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: shrb %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: imull $211, %eax, %eax @@ -185,7 +185,7 @@ define i8 @test8(i8 %x) nounwind { define i8 @test9(i8 %x) nounwind { ; X32-LABEL: test9: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: shrb $2, %al ; X32-NEXT: movzbl %al, %eax ; X32-NEXT: imull $71, %eax, %eax diff --git a/llvm/test/CodeGen/X86/divrem8_ext.ll b/llvm/test/CodeGen/X86/divrem8_ext.ll index c722b827cf7363..12dbe164626b05 100644 --- a/llvm/test/CodeGen/X86/divrem8_ext.ll +++ b/llvm/test/CodeGen/X86/divrem8_ext.ll @@ -49,7 +49,7 @@ define zeroext i8 @test_urem_zext_ah(i8 %x, i8 %y) { define i8 @test_urem_noext_ah(i8 %x, i8 %y) { ; X32-LABEL: test_urem_noext_ah: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: divb %cl ; X32-NEXT: movzbl %ah, %eax @@ -137,7 +137,7 @@ define signext i8 @test_srem_sext_ah(i8 %x, i8 %y) { define i8 @test_srem_noext_ah(i8 %x, i8 %y) { ; X32-LABEL: test_srem_noext_ah: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: idivb %cl ; X32-NEXT: movsbl %ah, %eax diff --git a/llvm/test/CodeGen/X86/emutls.ll b/llvm/test/CodeGen/X86/emutls.ll index a7fbc2d8531d3e..48bd2ae6240626 100644 --- a/llvm/test/CodeGen/X86/emutls.ll +++ b/llvm/test/CodeGen/X86/emutls.ll @@ -211,7 +211,7 @@ define dso_local i8 @f13() { ; X86-LABEL: f13: ; X86: movl $__emutls_v.b1, (%esp) ; X86-NEXT: calll __emutls_get_address -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: addl $12, %esp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll index 43d2ad4299b88a..c056a74cc183c8 100644 --- a/llvm/test/CodeGen/X86/extract-bits.ll +++ b/llvm/test/CodeGen/X86/extract-bits.ll @@ -34,8 +34,8 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr32_a0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi ; X86-NOBMI-NEXT: movl $1, %eax @@ -48,7 +48,7 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bextr32_a0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: orl %eax, %ecx @@ -57,8 +57,8 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bextr32_a0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -99,8 +99,8 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n ; X86-NOBMI-LABEL: bextr32_a0_arithmetic: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: sarl %cl, %esi ; X86-NOBMI-NEXT: movl $1, %eax @@ -113,8 +113,8 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n ; ; X86-BMI1-LABEL: bextr32_a0_arithmetic: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: sarl %cl, %edx ; X86-BMI1-NEXT: shll $8, %eax @@ -123,8 +123,8 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) n ; ; X86-BMI2-LABEL: bextr32_a0_arithmetic: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: sarxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -166,8 +166,8 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-LABEL: bextr32_a1_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi ; X86-NOBMI-NEXT: movl $1, %eax @@ -180,7 +180,7 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; ; X86-BMI1-LABEL: bextr32_a1_indexzext: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: orl %eax, %ecx @@ -189,8 +189,8 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; ; X86-BMI2-LABEL: bextr32_a1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -233,8 +233,8 @@ define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-LABEL: bextr32_a2_load: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi @@ -249,7 +249,7 @@ define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1-LABEL: bextr32_a2_load: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: orl %ecx, %edx @@ -258,9 +258,9 @@ define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; ; X86-BMI2-LABEL: bextr32_a2_load: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -303,8 +303,8 @@ define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; X86-NOBMI-LABEL: bextr32_a3_load_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi @@ -319,7 +319,7 @@ define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; X86-BMI1-LABEL: bextr32_a3_load_indexzext: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: orl %ecx, %edx @@ -328,9 +328,9 @@ define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; ; X86-BMI2-LABEL: bextr32_a3_load_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -375,8 +375,8 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-LABEL: bextr32_a4_commutative: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi ; X86-NOBMI-NEXT: movl $1, %eax @@ -389,7 +389,7 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; ; X86-BMI1-LABEL: bextr32_a4_commutative: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: orl %eax, %ecx @@ -398,8 +398,8 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; ; X86-BMI2-LABEL: bextr32_a4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -442,7 +442,7 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: pushl %eax -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl %eax, %ecx @@ -464,7 +464,7 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $8, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl %al, %edx @@ -481,7 +481,7 @@ define i32 @bextr32_a5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $8, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: bzhil %eax, %edx, %esi @@ -619,8 +619,8 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %esi @@ -763,8 +763,8 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) n ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %esi @@ -907,8 +907,8 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %esi @@ -1055,8 +1055,8 @@ define i64 @bextr64_a2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl (%eax), %esi ; X86-BMI2-NEXT: movl 4(%eax), %eax @@ -1202,8 +1202,8 @@ define i64 @bextr64_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl (%eax), %esi ; X86-BMI2-NEXT: movl 4(%eax), %eax @@ -1351,8 +1351,8 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax @@ -1423,7 +1423,7 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1473,7 +1473,7 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $12, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1523,7 +1523,7 @@ define i64 @bextr64_a5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $12, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -1620,8 +1620,8 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl %edi, %esi @@ -1651,8 +1651,8 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %esi @@ -1681,8 +1681,8 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI2-LABEL: bextr64_32_a0: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %edx @@ -1744,8 +1744,8 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl %edi, %esi @@ -1769,8 +1769,8 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -1790,8 +1790,8 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI2-LABEL: bextr64_32_a1: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx @@ -1846,8 +1846,8 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: pushl %eax -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl %edx, %esi @@ -1875,8 +1875,8 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: pushl %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl %edx, %esi @@ -1901,8 +1901,8 @@ define i32 @bextr64_32_a1_trunc_extrause(i64 %val, i64 %numskipbits, i32 %numlow ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: pushl %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %esi @@ -1990,8 +1990,8 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl %edi, %esi @@ -2015,8 +2015,8 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -2036,8 +2036,8 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI2-LABEL: bextr64_32_a2: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx @@ -2092,8 +2092,8 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl %edi, %esi @@ -2123,8 +2123,8 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %esi @@ -2153,8 +2153,8 @@ define i32 @bextr64_32_a3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI2-LABEL: bextr64_32_a3: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %edx @@ -2218,8 +2218,8 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr32_b0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi ; X86-NOBMI-NEXT: movl $-1, %eax @@ -2232,7 +2232,7 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bextr32_b0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: orl %eax, %ecx @@ -2241,8 +2241,8 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bextr32_b0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -2283,8 +2283,8 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-NOBMI-LABEL: bextr32_b1_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi ; X86-NOBMI-NEXT: movl $-1, %eax @@ -2297,7 +2297,7 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; ; X86-BMI1-LABEL: bextr32_b1_indexzext: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: orl %eax, %ecx @@ -2306,8 +2306,8 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext % ; ; X86-BMI2-LABEL: bextr32_b1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -2350,8 +2350,8 @@ define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-LABEL: bextr32_b2_load: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi @@ -2366,7 +2366,7 @@ define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1-LABEL: bextr32_b2_load: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: orl %ecx, %edx @@ -2375,9 +2375,9 @@ define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; ; X86-BMI2-LABEL: bextr32_b2_load: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -2420,8 +2420,8 @@ define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; X86-NOBMI-LABEL: bextr32_b3_load_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi @@ -2436,7 +2436,7 @@ define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; X86-BMI1-LABEL: bextr32_b3_load_indexzext: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: orl %ecx, %edx @@ -2445,9 +2445,9 @@ define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; ; X86-BMI2-LABEL: bextr32_b3_load_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -2492,8 +2492,8 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-LABEL: bextr32_b4_commutative: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: shrl %cl, %esi ; X86-NOBMI-NEXT: movl $-1, %eax @@ -2506,7 +2506,7 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; ; X86-BMI1-LABEL: bextr32_b4_commutative: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: orl %eax, %ecx @@ -2515,8 +2515,8 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; ; X86-BMI2-LABEL: bextr32_b4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -2559,7 +2559,7 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: pushl %eax -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl %eax, %ecx @@ -2581,7 +2581,7 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $8, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl %al, %edx @@ -2598,7 +2598,7 @@ define i32 @bextr32_b5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $8, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: bzhil %eax, %edx, %esi @@ -2704,8 +2704,8 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -2738,8 +2738,8 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax @@ -2842,8 +2842,8 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -2876,8 +2876,8 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext % ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax @@ -2985,8 +2985,8 @@ define i64 @bextr64_b2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl (%edx), %esi ; X86-BMI1-NEXT: movl 4(%edx), %edi @@ -3020,8 +3020,8 @@ define i64 @bextr64_b2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl (%edx), %eax ; X86-BMI2-NEXT: movl 4(%edx), %esi @@ -3128,8 +3128,8 @@ define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl (%edx), %esi ; X86-BMI1-NEXT: movl 4(%edx), %edi @@ -3163,8 +3163,8 @@ define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroex ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl (%edx), %eax ; X86-BMI2-NEXT: movl 4(%edx), %esi @@ -3274,8 +3274,8 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -3308,8 +3308,8 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax @@ -3426,7 +3426,7 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $12, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -3473,7 +3473,7 @@ define i64 @bextr64_b5_skipextrauses(i64 %val, i64 %numskipbits, i64 %numlowbits ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $12, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx @@ -3565,8 +3565,8 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl %edi, %eax @@ -3596,8 +3596,8 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -3625,8 +3625,8 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-BMI2-LABEL: bextr64_32_b0: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx @@ -3688,8 +3688,8 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl %edi, %esi @@ -3713,8 +3713,8 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -3734,8 +3734,8 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-BMI2-LABEL: bextr64_32_b1: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx @@ -3790,8 +3790,8 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl %edi, %esi @@ -3815,8 +3815,8 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -3836,8 +3836,8 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-BMI2-LABEL: bextr64_32_b2: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx @@ -3893,8 +3893,8 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl %edi, %eax @@ -3924,8 +3924,8 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -3953,8 +3953,8 @@ define i32 @bextr64_32_b3(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; X86-BMI2-LABEL: bextr64_32_b3: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx @@ -4021,7 +4021,7 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: pushl %eax -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: xorl %ecx, %ecx @@ -4043,7 +4043,7 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: pushl %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: shrl %cl, %edi ; X86-BMI1-NEXT: xorl %ecx, %ecx @@ -4065,8 +4065,8 @@ define i32 @bextr32_c0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: pushl %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl %ebx, %eax ; X86-BMI2-NEXT: negb %al @@ -4155,7 +4155,7 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: pushl %eax -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: xorl %ecx, %ecx @@ -4177,7 +4177,7 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: pushl %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: shrl %cl, %edi ; X86-BMI1-NEXT: xorl %ecx, %ecx @@ -4199,8 +4199,8 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: pushl %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl %ebx, %eax ; X86-BMI2-NEXT: negb %al @@ -4291,7 +4291,7 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: pushl %eax -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %edi ; X86-NOBMI-NEXT: shrl %cl, %edi @@ -4314,7 +4314,7 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: pushl %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl (%eax), %edi ; X86-BMI1-NEXT: shrl %cl, %edi @@ -4337,9 +4337,9 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: pushl %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi ; X86-BMI2-NEXT: movl %ebx, %eax ; X86-BMI2-NEXT: negb %al @@ -4429,7 +4429,7 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: pushl %eax -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %edi ; X86-NOBMI-NEXT: shrl %cl, %edi @@ -4452,7 +4452,7 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: pushl %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl (%eax), %edi ; X86-BMI1-NEXT: shrl %cl, %edi @@ -4475,9 +4475,9 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: pushl %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %esi ; X86-BMI2-NEXT: movl %ebx, %eax ; X86-BMI2-NEXT: negb %al @@ -4569,7 +4569,7 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: pushl %eax -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: shrl %cl, %edi ; X86-NOBMI-NEXT: xorl %ecx, %ecx @@ -4591,7 +4591,7 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: pushl %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: shrl %cl, %edi ; X86-BMI1-NEXT: xorl %ecx, %ecx @@ -4613,8 +4613,8 @@ define i32 @bextr32_c4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: pushl %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl %ebx, %eax ; X86-BMI2-NEXT: negb %al @@ -4758,7 +4758,7 @@ define i32 @bextr32_c5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $16, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI2-NEXT: shrxl %edi, {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl %ebx, %eax @@ -4865,7 +4865,7 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl %eax, %edi @@ -4911,7 +4911,7 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $12, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl %eax, %edi @@ -4957,7 +4957,7 @@ define i64 @bextr64_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $12, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %esi @@ -5071,7 +5071,7 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl %eax, %edi @@ -5117,7 +5117,7 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $12, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl %eax, %edi @@ -5163,7 +5163,7 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $12, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %esi @@ -5280,7 +5280,7 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %esi ; X86-NOBMI-NEXT: movl 4(%eax), %eax @@ -5327,7 +5327,7 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $12, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl (%eax), %esi ; X86-BMI1-NEXT: movl 4(%eax), %eax @@ -5374,7 +5374,7 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $12, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl (%eax), %esi ; X86-BMI2-NEXT: movl 4(%eax), %eax @@ -5490,7 +5490,7 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %esi ; X86-NOBMI-NEXT: movl 4(%eax), %eax @@ -5537,7 +5537,7 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $12, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl (%eax), %esi ; X86-BMI1-NEXT: movl 4(%eax), %eax @@ -5584,7 +5584,7 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $12, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl (%eax), %esi ; X86-BMI2-NEXT: movl 4(%eax), %eax @@ -5703,7 +5703,7 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: subl $12, %esp -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl %eax, %edi @@ -5749,7 +5749,7 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $12, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl %eax, %edi @@ -5795,7 +5795,7 @@ define i64 @bextr64_c4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $12, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %esi @@ -6138,7 +6138,7 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-LABEL: bextr64_32_c0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl %esi, %edx @@ -6165,7 +6165,7 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI1-LABEL: bextr64_32_c0: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl %esi, %edx @@ -6191,7 +6191,7 @@ define i32 @bextr64_32_c0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; ; X86-BMI2-LABEL: bextr64_32_c0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %edx @@ -6251,7 +6251,7 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-LABEL: bextr64_32_c1: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl %esi, %eax @@ -6274,8 +6274,8 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -6295,8 +6295,8 @@ define i32 @bextr64_32_c1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI2-LABEL: bextr64_32_c1: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx @@ -6350,7 +6350,7 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-LABEL: bextr64_32_c2: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl %esi, %eax @@ -6373,8 +6373,8 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -6394,8 +6394,8 @@ define i32 @bextr64_32_c2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI2-LABEL: bextr64_32_c2: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx @@ -6450,7 +6450,7 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-LABEL: bextr64_32_c3: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl %esi, %edx @@ -6478,7 +6478,7 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI1-LABEL: bextr64_32_c3: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl %esi, %edx @@ -6506,7 +6506,7 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI2-LABEL: bextr64_32_c3: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrdl %cl, %eax, %edx @@ -6579,7 +6579,7 @@ define i32 @bextr64_32_c3(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr32_d0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: xorl %ecx, %ecx @@ -6591,7 +6591,7 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bextr32_d0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: orl %eax, %ecx @@ -6600,8 +6600,8 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bextr32_d0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -6641,7 +6641,7 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr32_d1_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: xorl %ecx, %ecx @@ -6653,7 +6653,7 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; ; X86-BMI1-LABEL: bextr32_d1_indexzext: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: orl %eax, %ecx @@ -6662,8 +6662,8 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun ; ; X86-BMI2-LABEL: bextr32_d1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -6705,7 +6705,7 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) noun define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr32_d2_load: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax ; X86-NOBMI-NEXT: shrl %cl, %eax @@ -6719,7 +6719,7 @@ define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1-LABEL: bextr32_d2_load: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: orl %ecx, %edx @@ -6728,9 +6728,9 @@ define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind ; ; X86-BMI2-LABEL: bextr32_d2_load: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -6771,7 +6771,7 @@ define i32 @bextr32_d2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bextr32_d3_load_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax ; X86-NOBMI-NEXT: shrl %cl, %eax @@ -6785,7 +6785,7 @@ define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-BMI1-LABEL: bextr32_d3_load_indexzext: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: orl %ecx, %edx @@ -6794,9 +6794,9 @@ define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; ; X86-BMI2-LABEL: bextr32_d3_load_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: shrxl %edx, (%ecx), %ecx ; X86-BMI2-NEXT: bzhil %eax, %ecx, %eax ; X86-BMI2-NEXT: retl @@ -6861,7 +6861,7 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: subl $8, %esp -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: movzbl %al, %edx @@ -6878,7 +6878,7 @@ define i32 @bextr32_d5_skipextrauses(i32 %val, i32 %numskipbits, i32 %numlowbits ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: subl $8, %esp -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shrxl %ecx, {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: bzhil %eax, %edx, %esi @@ -6944,7 +6944,7 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl %edx, %eax @@ -6992,7 +6992,7 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl %edx, %eax @@ -7039,7 +7039,7 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax @@ -7114,7 +7114,7 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl %edx, %eax @@ -7162,7 +7162,7 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl %edx, %eax @@ -7209,7 +7209,7 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) noun ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax @@ -7288,7 +7288,7 @@ define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %edi ; X86-NOBMI-NEXT: movl 4(%eax), %edx @@ -7337,7 +7337,7 @@ define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl (%eax), %edi ; X86-BMI1-NEXT: movl 4(%eax), %edx @@ -7385,7 +7385,7 @@ define i64 @bextr64_d2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl (%edx), %eax ; X86-BMI2-NEXT: movl 4(%edx), %edx @@ -7462,7 +7462,7 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-NOBMI-NEXT: pushl %ebx ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %edi ; X86-NOBMI-NEXT: movl 4(%eax), %edx @@ -7511,7 +7511,7 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-BMI1-NEXT: pushl %ebx ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl (%eax), %edi ; X86-BMI1-NEXT: movl 4(%eax), %edx @@ -7559,7 +7559,7 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) n ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %edi ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl (%edx), %eax ; X86-BMI2-NEXT: movl 4(%edx), %edx @@ -7863,7 +7863,7 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-NOBMI-LABEL: bextr64_32_d0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl %esi, %eax @@ -7898,7 +7898,7 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; X86-BMI1-LABEL: bextr64_32_d0: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl %esi, %eax @@ -7932,7 +7932,7 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind ; ; X86-BMI2-LABEL: bextr64_32_d0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax @@ -8002,7 +8002,7 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-NOBMI-LABEL: bextr64_32_d1: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NOBMI-NEXT: movl %esi, %eax @@ -8025,8 +8025,8 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %edi ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-BMI1-NEXT: movl %edi, %edx @@ -8046,8 +8046,8 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind ; X86-BMI2-LABEL: bextr64_32_d1: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-BMI2-NEXT: shrdl %cl, %esi, %edx diff --git a/llvm/test/CodeGen/X86/extract-insert.ll b/llvm/test/CodeGen/X86/extract-insert.ll index 23d66b2d77f35f..8960fe96bf2b10 100644 --- a/llvm/test/CodeGen/X86/extract-insert.ll +++ b/llvm/test/CodeGen/X86/extract-insert.ll @@ -15,7 +15,7 @@ define i32 @extractelt_undef_insertelt(i32 %x, i32 %y) { define i8 @extractelt_bitcast(i32 %x) nounwind { ; X86-LABEL: extractelt_bitcast: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; ; X64-LABEL: extractelt_bitcast: @@ -87,7 +87,7 @@ define i16 @trunc_i64_to_i16_le(i64 %x) { define i8 @trunc_i32_to_i8_le(i32 %x) { ; X86-LABEL: trunc_i32_to_i8_le: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; ; X64-LABEL: trunc_i32_to_i8_le: diff --git a/llvm/test/CodeGen/X86/extract-lowbits.ll b/llvm/test/CodeGen/X86/extract-lowbits.ll index 823e26d63e15a4..0a4722b34369f5 100644 --- a/llvm/test/CodeGen/X86/extract-lowbits.ll +++ b/llvm/test/CodeGen/X86/extract-lowbits.ll @@ -30,7 +30,7 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_a0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: decl %eax @@ -39,14 +39,14 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi32_a0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi32_a0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -79,7 +79,7 @@ define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind { define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_a1_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: decl %eax @@ -88,14 +88,14 @@ define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi32_a1_indexzext: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi32_a1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -130,7 +130,7 @@ define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_a2_load: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: decl %eax @@ -140,7 +140,7 @@ define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-BMI1-LABEL: bzhi32_a2_load: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax ; X86-BMI1-NEXT: retl @@ -148,7 +148,7 @@ define i32 @bzhi32_a2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-BMI2-LABEL: bzhi32_a2_load: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax ; X86-BMI2-NEXT: retl ; @@ -183,7 +183,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_a3_load_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: decl %eax @@ -193,7 +193,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-BMI1-LABEL: bzhi32_a3_load_indexzext: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax ; X86-BMI1-NEXT: retl @@ -201,7 +201,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-BMI2-LABEL: bzhi32_a3_load_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax ; X86-BMI2-NEXT: retl ; @@ -236,7 +236,7 @@ define i32 @bzhi32_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_a4_commutative: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: decl %eax @@ -245,14 +245,14 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi32_a4_commutative: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi32_a4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -287,7 +287,7 @@ define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind { define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_a0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx @@ -306,7 +306,7 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_a0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %eax ; X86-BMI1-NEXT: xorl %edx, %edx ; X86-BMI1-NEXT: shldl %cl, %eax, %edx @@ -325,7 +325,7 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_a0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $1, %eax ; X86-BMI2-NEXT: xorl %edx, %edx ; X86-BMI2-NEXT: shldl %cl, %eax, %edx @@ -371,7 +371,7 @@ define i64 @bzhi64_a0(i64 %val, i64 %numlowbits) nounwind { define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_a1_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx @@ -390,7 +390,7 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_a1_indexzext: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %eax ; X86-BMI1-NEXT: xorl %edx, %edx ; X86-BMI1-NEXT: shldl %cl, %eax, %edx @@ -409,7 +409,7 @@ define i64 @bzhi64_a1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_a1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $1, %eax ; X86-BMI2-NEXT: xorl %edx, %edx ; X86-BMI2-NEXT: shldl %cl, %eax, %edx @@ -460,7 +460,7 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind { ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx @@ -482,7 +482,7 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind { ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %eax ; X86-BMI1-NEXT: xorl %edx, %edx ; X86-BMI1-NEXT: shldl %cl, %eax, %edx @@ -504,7 +504,7 @@ define i64 @bzhi64_a2_load(ptr %w, i64 %numlowbits) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $1, %eax ; X86-BMI2-NEXT: xorl %edx, %edx ; X86-BMI2-NEXT: shldl %cl, %eax, %edx @@ -554,7 +554,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx @@ -576,7 +576,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %eax ; X86-BMI1-NEXT: xorl %edx, %edx ; X86-BMI1-NEXT: shldl %cl, %eax, %edx @@ -598,7 +598,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $1, %eax ; X86-BMI2-NEXT: xorl %edx, %edx ; X86-BMI2-NEXT: shldl %cl, %eax, %edx @@ -649,7 +649,7 @@ define i64 @bzhi64_a3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_a4_commutative: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: xorl %edx, %edx ; X86-NOBMI-NEXT: shldl %cl, %eax, %edx @@ -668,7 +668,7 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_a4_commutative: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %eax ; X86-BMI1-NEXT: xorl %edx, %edx ; X86-BMI1-NEXT: shldl %cl, %eax, %edx @@ -687,7 +687,7 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_a4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $1, %eax ; X86-BMI2-NEXT: xorl %edx, %edx ; X86-BMI2-NEXT: shldl %cl, %eax, %edx @@ -736,7 +736,7 @@ define i64 @bzhi64_a4_commutative(i64 %val, i64 %numlowbits) nounwind { define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_a0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %edx ; X86-NOBMI-NEXT: shll %cl, %edx ; X86-NOBMI-NEXT: xorl %eax, %eax @@ -751,7 +751,7 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_a0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %edx ; X86-BMI1-NEXT: shll %cl, %edx ; X86-BMI1-NEXT: xorl %eax, %eax @@ -766,7 +766,7 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_32_a0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: jne .LBB10_2 @@ -810,7 +810,7 @@ define i32 @bzhi64_32_a0(i64 %val, i64 %numlowbits) nounwind { define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_a1: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: decl %eax @@ -819,14 +819,14 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_a1: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi64_32_a1: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -862,7 +862,7 @@ define i32 @bzhi64_32_a1(i64 %val, i32 %numlowbits) nounwind { define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_a1_trunc_extrause: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl %edx, (%eax) @@ -874,7 +874,7 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) ; ; X86-BMI1-LABEL: bzhi64_32_a1_trunc_extrause: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl %ecx, (%edx) @@ -884,7 +884,7 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) ; ; X86-BMI2-LABEL: bzhi64_32_a1_trunc_extrause: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movl %ecx, (%edx) @@ -927,7 +927,7 @@ define i32 @bzhi64_32_a1_trunc_extrause(i64 %val, i32 %numlowbits, ptr %escape) define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_a2: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: decl %eax @@ -936,14 +936,14 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_a2: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi64_32_a2: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -980,7 +980,7 @@ define i32 @bzhi64_32_a2(i64 %val, i32 %numlowbits) nounwind { define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_a3: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $1, %edx ; X86-NOBMI-NEXT: shll %cl, %edx ; X86-NOBMI-NEXT: xorl %eax, %eax @@ -995,7 +995,7 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_a3: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %edx ; X86-BMI1-NEXT: shll %cl, %edx ; X86-BMI1-NEXT: xorl %eax, %eax @@ -1010,7 +1010,7 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_32_a3: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: jne .LBB14_2 @@ -1057,7 +1057,7 @@ define i32 @bzhi64_32_a3(i64 %val, i64 %numlowbits) nounwind { define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_b0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: notl %eax @@ -1066,14 +1066,14 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi32_b0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi32_b0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -1106,7 +1106,7 @@ define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind { define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_b1_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: notl %eax @@ -1115,14 +1115,14 @@ define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi32_b1_indexzext: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi32_b1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -1157,7 +1157,7 @@ define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_b2_load: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: notl %eax @@ -1167,7 +1167,7 @@ define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-BMI1-LABEL: bzhi32_b2_load: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax ; X86-BMI1-NEXT: retl @@ -1175,7 +1175,7 @@ define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-BMI2-LABEL: bzhi32_b2_load: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax ; X86-BMI2-NEXT: retl ; @@ -1210,7 +1210,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_b3_load_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: notl %eax @@ -1220,7 +1220,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-BMI1-LABEL: bzhi32_b3_load_indexzext: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax ; X86-BMI1-NEXT: retl @@ -1228,7 +1228,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-BMI2-LABEL: bzhi32_b3_load_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax ; X86-BMI2-NEXT: retl ; @@ -1263,7 +1263,7 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_b4_commutative: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: notl %eax @@ -1272,14 +1272,14 @@ define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi32_b4_commutative: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi32_b4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -1315,7 +1315,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_b0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: shll %cl, %esi @@ -1337,7 +1337,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_b0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %edx ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shll %cl, %eax @@ -1353,7 +1353,7 @@ define i64 @bzhi64_b0(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_b0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: movl $-1, %ecx ; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax ; X86-BMI2-NEXT: testb $32, %dl @@ -1396,7 +1396,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_b1_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: shll %cl, %esi @@ -1418,7 +1418,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_b1_indexzext: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %edx ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shll %cl, %eax @@ -1434,7 +1434,7 @@ define i64 @bzhi64_b1_indexzext(i64 %val, i8 zeroext %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_b1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: movl $-1, %ecx ; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax ; X86-BMI2-NEXT: testb $32, %dl @@ -1482,7 +1482,7 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind { ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %edi ; X86-NOBMI-NEXT: shll %cl, %edi @@ -1507,7 +1507,7 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind { ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %esi ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shll %cl, %eax @@ -1526,7 +1526,7 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax ; X86-BMI2-NEXT: testb $32, %bl @@ -1573,7 +1573,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-NOBMI-NEXT: pushl %edi ; X86-NOBMI-NEXT: pushl %esi ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %edi ; X86-NOBMI-NEXT: shll %cl, %edi @@ -1598,7 +1598,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %esi ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shll %cl, %eax @@ -1617,7 +1617,7 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %ebx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ebx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %bl ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %ebx, %edx, %eax ; X86-BMI2-NEXT: testb $32, %bl @@ -1665,7 +1665,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_b4_commutative: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: pushl %esi -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: movl $-1, %esi ; X86-NOBMI-NEXT: shll %cl, %esi @@ -1687,7 +1687,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_b4_commutative: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %edx ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shll %cl, %eax @@ -1703,7 +1703,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_b4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: movl $-1, %ecx ; X86-BMI2-NEXT: shlxl %edx, %ecx, %eax ; X86-BMI2-NEXT: testb $32, %dl @@ -1748,7 +1748,7 @@ define i64 @bzhi64_b4_commutative(i64 %val, i64 %numlowbits) nounwind { define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_b0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shll %cl, %edx ; X86-NOBMI-NEXT: xorl %eax, %eax @@ -1763,7 +1763,7 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_b0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: xorl %edx, %edx @@ -1777,7 +1777,7 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_32_b0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI2-NEXT: testb $32, %al ; X86-BMI2-NEXT: jne .LBB25_2 @@ -1821,7 +1821,7 @@ define i32 @bzhi64_32_b0(i64 %val, i8 %numlowbits) nounwind { define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_b1: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: notl %eax @@ -1830,14 +1830,14 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_b1: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi64_32_b1: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -1874,7 +1874,7 @@ define i32 @bzhi64_32_b1(i64 %val, i8 %numlowbits) nounwind { define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_b2: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: notl %eax @@ -1883,14 +1883,14 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_b2: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi64_32_b2: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -1928,7 +1928,7 @@ define i32 @bzhi64_32_b2(i64 %val, i8 %numlowbits) nounwind { define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi64_32_b3: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %edx ; X86-NOBMI-NEXT: shll %cl, %edx ; X86-NOBMI-NEXT: xorl %eax, %eax @@ -1943,7 +1943,7 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_b3: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1, %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: xorl %edx, %edx @@ -1957,7 +1957,7 @@ define i32 @bzhi64_32_b3(i64 %val, i8 %numlowbits) nounwind { ; ; X86-BMI2-LABEL: bzhi64_32_b3: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI2-NEXT: testb $32, %al ; X86-BMI2-NEXT: jne .LBB28_2 @@ -2031,7 +2031,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits, ptr %escape) nounwind { ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx ; X86-BMI2-NEXT: negb %dl @@ -2108,7 +2108,7 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr %escape) nounwind ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx ; X86-BMI2-NEXT: negb %dl @@ -2195,7 +2195,7 @@ define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits, ptr %escape) nounwind { ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax ; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx ; X86-BMI2-NEXT: negb %dl @@ -2284,7 +2284,7 @@ define i32 @bzhi32_c3_load_indexzext(ptr %w, i8 %numlowbits, ptr %escape) nounwi ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: bzhil %edx, (%eax), %eax ; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx ; X86-BMI2-NEXT: negb %dl @@ -2365,7 +2365,7 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr %escape) nounwi ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-BMI2-NEXT: bzhil %edx, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: # kill: def $dl killed $dl killed $edx def $edx ; X86-BMI2-NEXT: negb %dl @@ -3070,14 +3070,14 @@ define i32 @bzhi64_32_c1(i64 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_c1: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi64_32_c1: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -3124,14 +3124,14 @@ define i32 @bzhi64_32_c2(i64 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_c2: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi64_32_c2: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -3236,14 +3236,14 @@ define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi32_d0: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi32_d0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -3286,14 +3286,14 @@ define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi32_d1_indexzext: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi32_d1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; @@ -3339,7 +3339,7 @@ define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-BMI1-LABEL: bzhi32_d2_load: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax ; X86-BMI1-NEXT: retl @@ -3347,7 +3347,7 @@ define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind { ; X86-BMI2-LABEL: bzhi32_d2_load: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax ; X86-BMI2-NEXT: retl ; @@ -3393,7 +3393,7 @@ define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; X86-BMI1-LABEL: bzhi32_d3_load_indexzext: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: shll $8, %ecx ; X86-BMI1-NEXT: bextrl %ecx, (%eax), %eax ; X86-BMI1-NEXT: retl @@ -3401,7 +3401,7 @@ define i32 @bzhi32_d3_load_indexzext(ptr %w, i8 %numlowbits) nounwind { ; X86-BMI2-LABEL: bzhi32_d3_load_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: bzhil %ecx, (%eax), %eax ; X86-BMI2-NEXT: retl ; @@ -4119,14 +4119,14 @@ define i32 @bzhi64_32_d1(i64 %val, i32 %numlowbits) nounwind { ; ; X86-BMI1-LABEL: bzhi64_32_d1: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI1-NEXT: shll $8, %eax ; X86-BMI1-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: bzhi64_32_d1: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: bzhil %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/extractelement-index.ll b/llvm/test/CodeGen/X86/extractelement-index.ll index 077351b9718d5f..99297b435b2a98 100644 --- a/llvm/test/CodeGen/X86/extractelement-index.ll +++ b/llvm/test/CodeGen/X86/extractelement-index.ll @@ -427,14 +427,14 @@ define i8 @extractelement_v16i8_var(<16 x i8> %a, i256 %i) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: andl $15, %edi ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movzbl -24(%rsp,%rdi), %eax +; SSE-NEXT: movb -24(%rsp,%rdi), %al ; SSE-NEXT: retq ; ; AVX-LABEL: extractelement_v16i8_var: ; AVX: # %bb.0: ; AVX-NEXT: andl $15, %edi ; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) -; AVX-NEXT: movzbl -24(%rsp,%rdi), %eax +; AVX-NEXT: movb -24(%rsp,%rdi), %al ; AVX-NEXT: retq %b = extractelement <16 x i8> %a, i256 %i ret i8 %b @@ -446,7 +446,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind { ; SSE-NEXT: andl $31, %edi ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; SSE-NEXT: movzbl -40(%rsp,%rdi), %eax +; SSE-NEXT: movb -40(%rsp,%rdi), %al ; SSE-NEXT: retq ; ; AVX-LABEL: extractelement_v32i8_var: @@ -457,7 +457,7 @@ define i8 @extractelement_v32i8_var(<32 x i8> %a, i256 %i) nounwind { ; AVX-NEXT: subq $64, %rsp ; AVX-NEXT: andl $31, %edi ; AVX-NEXT: vmovaps %ymm0, (%rsp) -; AVX-NEXT: movzbl (%rsp,%rdi), %eax +; AVX-NEXT: movb (%rsp,%rdi), %al ; AVX-NEXT: movq %rbp, %rsp ; AVX-NEXT: popq %rbp ; AVX-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/fast-isel-call-bool.ll b/llvm/test/CodeGen/X86/fast-isel-call-bool.ll index b9a5cecdf64496..43ea84bfaf0855 100644 --- a/llvm/test/CodeGen/X86/fast-isel-call-bool.ll +++ b/llvm/test/CodeGen/X86/fast-isel-call-bool.ll @@ -9,7 +9,7 @@ define i64 @foo(ptr %arg) { ; CHECK-LABEL: foo: top: %0 = load i8, ptr %arg -; CHECK: movzbl +; CHECK: movb %1 = trunc i8 %0 to i1 ; CHECK: andb $1, %2 = call i64 @bar(i1 %1) diff --git a/llvm/test/CodeGen/X86/fast-isel-i1.ll b/llvm/test/CodeGen/X86/fast-isel-i1.ll index f4c658b731dad2..24b6bffc1a3d5a 100644 --- a/llvm/test/CodeGen/X86/fast-isel-i1.ll +++ b/llvm/test/CodeGen/X86/fast-isel-i1.ll @@ -25,7 +25,7 @@ define void @test2(ptr %a) nounwind { entry: ; clang uses i8 constants for booleans, so we test with an i8 1. ; CHECK-LABEL: test2: -; CHECK: movzbl {{.*}} %eax +; CHECK: movb {{.*}} %al ; CHECK-NEXT: xorb $1, %al ; CHECK-NEXT: testb $1 %tmp = load i8, ptr %a, align 1 diff --git a/llvm/test/CodeGen/X86/fast-isel-sext-zext.ll b/llvm/test/CodeGen/X86/fast-isel-sext-zext.ll index 7dae049d8e0235..9a83db52011a9b 100644 --- a/llvm/test/CodeGen/X86/fast-isel-sext-zext.ll +++ b/llvm/test/CodeGen/X86/fast-isel-sext-zext.ll @@ -5,7 +5,7 @@ define i8 @test1(i8 %x) nounwind { ; X32-LABEL: test1: ; X32: ## %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: negb %al ; X32-NEXT: retl @@ -87,7 +87,7 @@ define i32 @test4(i32 %x) nounwind { define i8 @test5(i8 %x) nounwind { ; X32-LABEL: test5: ; X32: ## %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/fixup-bw-copy.ll b/llvm/test/CodeGen/X86/fixup-bw-copy.ll index 73907d336b194b..f73b0c840c1f70 100644 --- a/llvm/test/CodeGen/X86/fixup-bw-copy.ll +++ b/llvm/test/CodeGen/X86/fixup-bw-copy.ll @@ -13,15 +13,10 @@ define i8 @test_movb(i8 %a0) nounwind { ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq ; -; BWON32-LABEL: test_movb: -; BWON32: # %bb.0: -; BWON32-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; BWON32-NEXT: retl -; -; BWOFF32-LABEL: test_movb: -; BWOFF32: # %bb.0: -; BWOFF32-NEXT: movb {{[0-9]+}}(%esp), %al -; BWOFF32-NEXT: retl +; X32-LABEL: test_movb: +; X32: # %bb.0: +; X32-NEXT: movb {{[0-9]+}}(%esp), %al +; X32-NEXT: retl ret i8 %a0 } diff --git a/llvm/test/CodeGen/X86/fixup-bw-inst.ll b/llvm/test/CodeGen/X86/fixup-bw-inst.ll index 6c371e22b4e6e5..76c089612b85ab 100644 --- a/llvm/test/CodeGen/X86/fixup-bw-inst.ll +++ b/llvm/test/CodeGen/X86/fixup-bw-inst.ll @@ -96,11 +96,11 @@ a4: ; preds = %3, %.lr.ph ret void } -; This test contains nothing but a simple byte load and store. -; movb encodes smaller, but we use movzbl for the load for better perf. +; This test contains nothing but a simple byte load and store. Since +; movb encodes smaller, we do not want to use movzbl unless in a tight loop. +; So this test checks that movb is used. ; CHECK-LABEL: foo3: -; BWON: movzbl -; BWOFF: movb +; CHECK: movb ; CHECK: movb define void @foo3(ptr%dst, ptr%src) { %t0 = load i8, ptr%src, align 1 diff --git a/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll b/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll index 62abc4e035f4ac..a0fb498ad4fa05 100644 --- a/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll +++ b/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll @@ -5,7 +5,7 @@ define i8 @t1(ptr %X, i64 %i) { ; CHECK-LABEL: t1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andq $-255, %rsi -; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax +; CHECK-NEXT: movb (%rdi,%rsi,4), %al ; CHECK-NEXT: retq entry: @@ -20,7 +20,7 @@ define i8 @t2(ptr %X, i64 %i) { ; CHECK-LABEL: t2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andq $-14, %rsi -; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax +; CHECK-NEXT: movb (%rdi,%rsi,4), %al ; CHECK-NEXT: retq entry: @@ -35,7 +35,7 @@ define i8 @t3(ptr %X, i64 %i) { ; CHECK-LABEL: t3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movzbl (%rdi,%rax,4), %eax +; CHECK-NEXT: movb (%rdi,%rax,4), %al ; CHECK-NEXT: retq entry: @@ -50,7 +50,7 @@ define i8 @t4(ptr %X, i64 %i) { ; CHECK-LABEL: t4: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl $-2, %esi -; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax +; CHECK-NEXT: movb (%rdi,%rsi,4), %al ; CHECK-NEXT: retq entry: @@ -65,7 +65,7 @@ define i8 @t5(ptr %X, i64 %i) { ; CHECK-LABEL: t5: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: andl $-250002, %esi # imm = 0xFFFC2F6E -; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax +; CHECK-NEXT: movb (%rdi,%rsi,4), %al ; CHECK-NEXT: retq entry: @@ -81,7 +81,7 @@ define i8 @t6(ptr %X, i32 %i) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: # kill: def $esi killed $esi def $rsi ; CHECK-NEXT: andl $15, %esi -; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax +; CHECK-NEXT: movb (%rdi,%rsi,4), %al ; CHECK-NEXT: retq entry: %tmp2 = shl i32 %i, 2 diff --git a/llvm/test/CodeGen/X86/fold-and-shift.ll b/llvm/test/CodeGen/X86/fold-and-shift.ll index 1318de65df3022..8e1204a6292aa5 100644 --- a/llvm/test/CodeGen/X86/fold-and-shift.ll +++ b/llvm/test/CodeGen/X86/fold-and-shift.ll @@ -94,7 +94,7 @@ define i8 @t5(ptr %X, i32 %i) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: andl $-14, %ecx -; CHECK-NEXT: movzbl (%eax,%ecx,4), %eax +; CHECK-NEXT: movb (%eax,%ecx,4), %al ; CHECK-NEXT: retl entry: @@ -111,7 +111,7 @@ define i8 @t6(ptr %X, i32 %i) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl $-255, %ecx ; CHECK-NEXT: andl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movzbl (%eax,%ecx,4), %eax +; CHECK-NEXT: movb (%eax,%ecx,4), %al ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index f4689b2ab6bb94..378a32ea61f7cd 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -935,7 +935,7 @@ define i8 @f20s8(double %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -1207,7 +1207,7 @@ define i8 @f20u8(double %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll index 25a946465ff3fa..9646e7db840c01 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll @@ -67,7 +67,7 @@ define i1 @fptosi_f32toi1(float %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -114,7 +114,7 @@ define i8 @fptosi_f32toi8(float %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -339,7 +339,7 @@ define i1 @fptoui_f32toi1(float %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -386,7 +386,7 @@ define i8 @fptoui_f32toi8(float %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -754,7 +754,7 @@ define i8 @fptosi_f64toi8(double %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -979,7 +979,7 @@ define i1 @fptoui_f64toi1(double %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -1026,7 +1026,7 @@ define i8 @fptoui_f64toi8(double %x) #0 { ; X87-NEXT: fldcw {{[0-9]+}}(%esp) ; X87-NEXT: fistps {{[0-9]+}}(%esp) ; X87-NEXT: fldcw {{[0-9]+}}(%esp) -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: addl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll index 18b6b4ad2055dd..7617aee5e5fbfd 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp-fp16.ll @@ -43,7 +43,7 @@ define half @sitofp_i1tof16(i1 %x) #0 { ; ; X86-LABEL: sitofp_i1tof16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: negb %al ; X86-NEXT: movsbl %al, %eax @@ -231,7 +231,7 @@ define half @uitofp_i1tof16(i1 %x) #0 { ; ; X86-LABEL: uitofp_i1tof16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll index 4933a870ddd872..738fec62e5f5c3 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll @@ -34,7 +34,7 @@ define float @sitofp_i1tof32(i1 %x) #0 { ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: movb {{[0-9]+}}(%esp), %al ; SSE-X86-NEXT: andb $1, %al ; SSE-X86-NEXT: negb %al ; SSE-X86-NEXT: movsbl %al, %eax @@ -58,7 +58,7 @@ define float @sitofp_i1tof32(i1 %x) #0 { ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: movb {{[0-9]+}}(%esp), %al ; AVX-X86-NEXT: andb $1, %al ; AVX-X86-NEXT: negb %al ; AVX-X86-NEXT: movsbl %al, %eax @@ -82,7 +82,7 @@ define float @sitofp_i1tof32(i1 %x) #0 { ; X87: # %bb.0: ; X87-NEXT: pushl %eax ; X87-NEXT: .cfi_def_cfa_offset 8 -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: andb $1, %al ; X87-NEXT: negb %al ; X87-NEXT: movsbl %al, %eax @@ -313,7 +313,7 @@ define float @uitofp_i1tof32(i1 %x) #0 { ; SSE-X86: # %bb.0: ; SSE-X86-NEXT: pushl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 8 -; SSE-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: movb {{[0-9]+}}(%esp), %al ; SSE-X86-NEXT: andb $1, %al ; SSE-X86-NEXT: movzbl %al, %eax ; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 @@ -334,7 +334,7 @@ define float @uitofp_i1tof32(i1 %x) #0 { ; AVX-X86: # %bb.0: ; AVX-X86-NEXT: pushl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 8 -; AVX-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: movb {{[0-9]+}}(%esp), %al ; AVX-X86-NEXT: andb $1, %al ; AVX-X86-NEXT: movzbl %al, %eax ; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 @@ -355,7 +355,7 @@ define float @uitofp_i1tof32(i1 %x) #0 { ; X87: # %bb.0: ; X87-NEXT: pushl %eax ; X87-NEXT: .cfi_def_cfa_offset 8 -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: andb $1, %al ; X87-NEXT: movzbl %al, %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) @@ -965,7 +965,7 @@ define double @uitofp_i1tof64(i1 %x) #0 { ; SSE-X86-NEXT: .cfi_def_cfa_register %ebp ; SSE-X86-NEXT: andl $-8, %esp ; SSE-X86-NEXT: subl $8, %esp -; SSE-X86-NEXT: movzbl 8(%ebp), %eax +; SSE-X86-NEXT: movb 8(%ebp), %al ; SSE-X86-NEXT: andb $1, %al ; SSE-X86-NEXT: movzbl %al, %eax ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 @@ -992,7 +992,7 @@ define double @uitofp_i1tof64(i1 %x) #0 { ; AVX-X86-NEXT: .cfi_def_cfa_register %ebp ; AVX-X86-NEXT: andl $-8, %esp ; AVX-X86-NEXT: subl $8, %esp -; AVX-X86-NEXT: movzbl 8(%ebp), %eax +; AVX-X86-NEXT: movb 8(%ebp), %al ; AVX-X86-NEXT: andb $1, %al ; AVX-X86-NEXT: movzbl %al, %eax ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 @@ -1014,7 +1014,7 @@ define double @uitofp_i1tof64(i1 %x) #0 { ; X87: # %bb.0: ; X87-NEXT: pushl %eax ; X87-NEXT: .cfi_def_cfa_offset 8 -; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movb {{[0-9]+}}(%esp), %al ; X87-NEXT: andb $1, %al ; X87-NEXT: movzbl %al, %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll index b9b1ae60d479ed..818ca08ae2d237 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -242,7 +242,7 @@ define i1 @fp80_to_sint1(x86_fp80 %x) #0 { ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistps {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -258,7 +258,7 @@ define i1 @fp80_to_sint1(x86_fp80 %x) #0 { ; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) ; X64-NEXT: fistps -{{[0-9]+}}(%rsp) ; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al ; X64-NEXT: retq %result = call i1 @llvm.experimental.constrained.fptosi.i1.f80(x86_fp80 %x, metadata !"fpexcept.strict") #0 @@ -279,7 +279,7 @@ define i8 @fp80_to_sint8(x86_fp80 %x) #0 { ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistps {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -295,7 +295,7 @@ define i8 @fp80_to_sint8(x86_fp80 %x) #0 { ; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) ; X64-NEXT: fistps -{{[0-9]+}}(%rsp) ; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al ; X64-NEXT: retq %result = call i8 @llvm.experimental.constrained.fptosi.i8.f80(x86_fp80 %x, metadata !"fpexcept.strict") #0 @@ -435,7 +435,7 @@ define i1 @fp80_to_uint1(x86_fp80 %x) #0 { ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistps {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -451,7 +451,7 @@ define i1 @fp80_to_uint1(x86_fp80 %x) #0 { ; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) ; X64-NEXT: fistps -{{[0-9]+}}(%rsp) ; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al ; X64-NEXT: retq %result = call i1 @llvm.experimental.constrained.fptoui.i1.f80(x86_fp80 %x, metadata !"fpexcept.strict") #0 @@ -472,7 +472,7 @@ define i8 @fp80_to_uint8(x86_fp80 %x) #0 { ; X86-NEXT: fldcw {{[0-9]+}}(%esp) ; X86-NEXT: fistps {{[0-9]+}}(%esp) ; X86-NEXT: fldcw {{[0-9]+}}(%esp) -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -488,7 +488,7 @@ define i8 @fp80_to_uint8(x86_fp80 %x) #0 { ; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) ; X64-NEXT: fistps -{{[0-9]+}}(%rsp) ; X64-NEXT: fldcw -{{[0-9]+}}(%rsp) -; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: movb -{{[0-9]+}}(%rsp), %al ; X64-NEXT: retq %result = call i8 @llvm.experimental.constrained.fptoui.i8.f80(x86_fp80 %x, metadata !"fpexcept.strict") #0 @@ -655,7 +655,7 @@ define x86_fp80 @sint1_to_fp80(i1 %x) #0 { ; X86: # %bb.0: ; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: negb %al ; X86-NEXT: movsbl %al, %eax @@ -781,7 +781,7 @@ define x86_fp80 @uint1_to_fp80(i1 %x) #0 { ; X86: # %bb.0: ; X86-NEXT: pushl %eax ; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $1, %al ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll index 5a60a9e00aa3f6..8ace8369873199 100644 --- a/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/X86/fptosi-sat-scalar.ll @@ -42,7 +42,7 @@ define i1 @test_signed_i1_f32(float %f) nounwind { ; X86-X87-NEXT: movb $-1, %dl ; X86-X87-NEXT: jb .LBB0_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-X87-NEXT: .LBB0_2: ; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) @@ -115,7 +115,7 @@ define i8 @test_signed_i8_f32(float %f) nounwind { ; X86-X87-NEXT: movb $-128, %dl ; X86-X87-NEXT: jb .LBB1_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-X87-NEXT: .LBB1_2: ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ; X86-X87-NEXT: fxch %st(1) @@ -1062,7 +1062,7 @@ define i1 @test_signed_i1_f64(double %f) nounwind { ; X86-X87-NEXT: movb $-1, %dl ; X86-X87-NEXT: jb .LBB10_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-X87-NEXT: .LBB10_2: ; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) @@ -1135,7 +1135,7 @@ define i8 @test_signed_i8_f64(double %f) nounwind { ; X86-X87-NEXT: movb $-128, %dl ; X86-X87-NEXT: jb .LBB11_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-X87-NEXT: .LBB11_2: ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ; X86-X87-NEXT: fxch %st(1) @@ -2079,7 +2079,7 @@ define i1 @test_signed_i1_f16(half %f) nounwind { ; X86-X87-NEXT: movb $-1, %dl ; X86-X87-NEXT: jb .LBB20_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-X87-NEXT: .LBB20_2: ; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) @@ -2176,7 +2176,7 @@ define i8 @test_signed_i8_f16(half %f) nounwind { ; X86-X87-NEXT: movb $-128, %dl ; X86-X87-NEXT: jb .LBB21_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-X87-NEXT: .LBB21_2: ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ; X86-X87-NEXT: fxch %st(1) @@ -3274,7 +3274,7 @@ define i1 @test_signed_i1_f80(x86_fp80 %f) nounwind { ; X86-X87-NEXT: movb $-1, %dl ; X86-X87-NEXT: jb .LBB30_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-X87-NEXT: .LBB30_2: ; X86-X87-NEXT: fldz ; X86-X87-NEXT: fxch %st(1) @@ -3387,7 +3387,7 @@ define i8 @test_signed_i8_f80(x86_fp80 %f) nounwind { ; X86-X87-NEXT: movb $-128, %dl ; X86-X87-NEXT: jb .LBB31_2 ; X86-X87-NEXT: # %bb.1: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-X87-NEXT: .LBB31_2: ; X86-X87-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}} ; X86-X87-NEXT: fxch %st(1) diff --git a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll index 01426b1ac91c24..5fbf26c9d166ab 100644 --- a/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/X86/fptoui-sat-scalar.ll @@ -39,7 +39,7 @@ define i1 @test_unsigned_i1_f32(float %f) nounwind { ; X86-X87-NEXT: sahf ; X86-X87-NEXT: jb .LBB0_1 ; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-X87-NEXT: jmp .LBB0_3 ; X86-X87-NEXT: .LBB0_1: ; X86-X87-NEXT: xorl %ecx, %ecx @@ -102,7 +102,7 @@ define i8 @test_unsigned_i8_f32(float %f) nounwind { ; X86-X87-NEXT: sahf ; X86-X87-NEXT: jb .LBB1_1 ; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-X87-NEXT: jmp .LBB1_3 ; X86-X87-NEXT: .LBB1_1: ; X86-X87-NEXT: xorl %ecx, %ecx @@ -974,7 +974,7 @@ define i1 @test_unsigned_i1_f64(double %f) nounwind { ; X86-X87-NEXT: sahf ; X86-X87-NEXT: jb .LBB10_1 ; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-X87-NEXT: jmp .LBB10_3 ; X86-X87-NEXT: .LBB10_1: ; X86-X87-NEXT: xorl %ecx, %ecx @@ -1037,7 +1037,7 @@ define i8 @test_unsigned_i8_f64(double %f) nounwind { ; X86-X87-NEXT: sahf ; X86-X87-NEXT: jb .LBB11_1 ; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-X87-NEXT: jmp .LBB11_3 ; X86-X87-NEXT: .LBB11_1: ; X86-X87-NEXT: xorl %ecx, %ecx @@ -1900,7 +1900,7 @@ define i1 @test_unsigned_i1_f16(half %f) nounwind { ; X86-X87-NEXT: sahf ; X86-X87-NEXT: jb .LBB20_1 ; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-X87-NEXT: jmp .LBB20_3 ; X86-X87-NEXT: .LBB20_1: ; X86-X87-NEXT: xorl %ecx, %ecx @@ -1982,7 +1982,7 @@ define i8 @test_unsigned_i8_f16(half %f) nounwind { ; X86-X87-NEXT: sahf ; X86-X87-NEXT: jb .LBB21_1 ; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-X87-NEXT: jmp .LBB21_3 ; X86-X87-NEXT: .LBB21_1: ; X86-X87-NEXT: xorl %ecx, %ecx @@ -2995,7 +2995,7 @@ define i1 @test_unsigned_i1_f80(x86_fp80 %f) nounwind { ; X86-X87-NEXT: sahf ; X86-X87-NEXT: jb .LBB30_1 ; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-X87-NEXT: jmp .LBB30_3 ; X86-X87-NEXT: .LBB30_1: ; X86-X87-NEXT: xorl %ecx, %ecx @@ -3092,7 +3092,7 @@ define i8 @test_unsigned_i8_f80(x86_fp80 %f) nounwind { ; X86-X87-NEXT: sahf ; X86-X87-NEXT: jb .LBB31_1 ; X86-X87-NEXT: # %bb.2: -; X86-X87-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-X87-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-X87-NEXT: jmp .LBB31_3 ; X86-X87-NEXT: .LBB31_1: ; X86-X87-NEXT: xorl %ecx, %ecx diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index eaf794c7876883..5aaced853153f5 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -17,7 +17,7 @@ declare i128 @llvm.fshl.i128(i128, i128, i128) nounwind readnone define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind { ; X86-LABEL: var_shift_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $8, %eax @@ -48,14 +48,14 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { ; X86-FAST: # %bb.0: ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-FAST-NEXT: andb $15, %cl ; X86-FAST-NEXT: shldw %cl, %dx, %ax ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i16: ; X86-SLOW: # %bb.0: -; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: shll $16, %eax @@ -95,7 +95,7 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-FAST-LABEL: var_shift_i32: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-FAST-NEXT: shldl %cl, %edx, %eax @@ -104,7 +104,7 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-SLOW-LABEL: var_shift_i32: ; X86-SLOW: # %bb.0: ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: shll %cl, %edx ; X86-SLOW-NEXT: notb %cl @@ -446,8 +446,8 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { define i8 @const_shift_i8(i8 %x, i8 %y) nounwind { ; X86-LABEL: const_shift_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shrb %cl ; X86-NEXT: shlb $7, %al ; X86-NEXT: orb %cl, %al @@ -588,12 +588,12 @@ define i8 @combine_fshl_load_i8(ptr %p) nounwind { ; X86-LABEL: combine_fshl_load_i8: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl 1(%eax), %eax +; X86-NEXT: movb 1(%eax), %al ; X86-NEXT: retl ; ; X64-LABEL: combine_fshl_load_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl 1(%rdi), %eax +; X64-NEXT: movb 1(%rdi), %al ; X64-NEXT: retq %p1 = getelementptr i8, ptr %p, i32 1 %ld0 = load i8, ptr%p diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index eb1f0402c6f473..b3d13a6966ba58 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -17,7 +17,7 @@ declare i128 @llvm.fshr.i128(i128, i128, i128) nounwind readnone define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind { ; X86-LABEL: var_shift_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $8, %eax @@ -47,14 +47,14 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { ; X86-FAST: # %bb.0: ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-FAST-NEXT: andb $15, %cl ; X86-FAST-NEXT: shrdw %cl, %dx, %ax ; X86-FAST-NEXT: retl ; ; X86-SLOW-LABEL: var_shift_i16: ; X86-SLOW: # %bb.0: -; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-NEXT: shll $16, %eax @@ -92,7 +92,7 @@ define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind { define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-FAST-LABEL: var_shift_i32: ; X86-FAST: # %bb.0: -; X86-FAST-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-FAST-NEXT: shrdl %cl, %edx, %eax @@ -101,7 +101,7 @@ define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-SLOW-LABEL: var_shift_i32: ; X86-SLOW: # %bb.0: ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: shrl %cl, %edx ; X86-SLOW-NEXT: notb %cl @@ -436,8 +436,8 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { define i8 @const_shift_i8(i8 %x, i8 %y) nounwind { ; X86-LABEL: const_shift_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shrb $7, %cl ; X86-NEXT: addb %al, %al ; X86-NEXT: orb %cl, %al @@ -577,12 +577,12 @@ define i8 @combine_fshr_load_i8(ptr %p) nounwind { ; X86-LABEL: combine_fshr_load_i8: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: retl ; ; X64-LABEL: combine_fshr_load_i8: ; X64: # %bb.0: -; X64-NEXT: movzbl (%rdi), %eax +; X64-NEXT: movb (%rdi), %al ; X64-NEXT: retq %p1 = getelementptr i8, ptr %p, i32 1 %ld0 = load i8, ptr%p diff --git a/llvm/test/CodeGen/X86/funnel-shift-rot.ll b/llvm/test/CodeGen/X86/funnel-shift-rot.ll index 79870de6a25897..ef287b959427be 100644 --- a/llvm/test/CodeGen/X86/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/X86/funnel-shift-rot.ll @@ -19,7 +19,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define i8 @rotl_i8_const_shift(i8 %x) nounwind { ; X86-SSE2-LABEL: rotl_i8_const_shift: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-SSE2-NEXT: rolb $3, %al ; X86-SSE2-NEXT: retl ; @@ -36,7 +36,7 @@ define i8 @rotl_i8_const_shift(i8 %x) nounwind { define i8 @rotl_i8_const_shift1(i8 %x) nounwind { ; X86-SSE2-LABEL: rotl_i8_const_shift1: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-SSE2-NEXT: rolb %al ; X86-SSE2-NEXT: retl ; @@ -53,7 +53,7 @@ define i8 @rotl_i8_const_shift1(i8 %x) nounwind { define i8 @rotl_i8_const_shift7(i8 %x) nounwind { ; X86-SSE2-LABEL: rotl_i8_const_shift7: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-SSE2-NEXT: rorb %al ; X86-SSE2-NEXT: retl ; @@ -89,7 +89,7 @@ define i64 @rotl_i64_const_shift(i64 %x) nounwind { define i16 @rotl_i16(i16 %x, i16 %z) nounwind { ; X86-SSE2-LABEL: rotl_i16: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: rolw %cl, %ax ; X86-SSE2-NEXT: retl @@ -109,7 +109,7 @@ define i16 @rotl_i16(i16 %x, i16 %z) nounwind { define i32 @rotl_i32(i32 %x, i32 %z) nounwind { ; X86-SSE2-LABEL: rotl_i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: roll %cl, %eax ; X86-SSE2-NEXT: retl @@ -187,7 +187,7 @@ define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) nounwind { define i8 @rotr_i8_const_shift(i8 %x) nounwind { ; X86-SSE2-LABEL: rotr_i8_const_shift: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-SSE2-NEXT: rorb $3, %al ; X86-SSE2-NEXT: retl ; @@ -204,7 +204,7 @@ define i8 @rotr_i8_const_shift(i8 %x) nounwind { define i8 @rotr_i8_const_shift1(i8 %x) nounwind { ; X86-SSE2-LABEL: rotr_i8_const_shift1: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-SSE2-NEXT: rorb %al ; X86-SSE2-NEXT: retl ; @@ -221,7 +221,7 @@ define i8 @rotr_i8_const_shift1(i8 %x) nounwind { define i8 @rotr_i8_const_shift7(i8 %x) nounwind { ; X86-SSE2-LABEL: rotr_i8_const_shift7: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-SSE2-NEXT: rolb %al ; X86-SSE2-NEXT: retl ; @@ -256,7 +256,7 @@ define i32 @rotr_i32_const_shift(i32 %x) nounwind { define i16 @rotr_i16(i16 %x, i16 %z) nounwind { ; X86-SSE2-LABEL: rotr_i16: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: rorw %cl, %ax ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll index 404587437f5f3e..a2dab38acb9a41 100644 --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -20,7 +20,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-SSE2-LABEL: fshl_i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: shldl %cl, %edx, %eax @@ -282,7 +282,7 @@ define i8 @fshl_i8_const_fold() nounwind { define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind { ; X86-SSE2-LABEL: fshr_i32: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax @@ -419,7 +419,7 @@ define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind { define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshl_i32_undef0: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: shldl %cl, %eax, %eax ; X86-SSE2-NEXT: retl @@ -475,7 +475,7 @@ define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind { define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshl_i32_undef1: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: shldl %cl, %eax, %eax ; X86-SSE2-NEXT: retl @@ -495,7 +495,7 @@ define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshl_i32_undef1_msk: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: andb $7, %cl ; X86-SSE2-NEXT: shll %cl, %eax ; X86-SSE2-NEXT: retl @@ -549,7 +549,7 @@ define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind { define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshr_i32_undef0: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: shrdl %cl, %eax, %eax ; X86-SSE2-NEXT: retl @@ -569,7 +569,7 @@ define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshr_i32_undef0_msk: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: andb $7, %cl ; X86-SSE2-NEXT: shrl %cl, %eax ; X86-SSE2-NEXT: retl @@ -606,7 +606,7 @@ define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind { define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshr_i32_undef1: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: shrdl %cl, %eax, %eax ; X86-SSE2-NEXT: retl @@ -681,7 +681,7 @@ define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind { define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshl_i32_zero0: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: xorl %eax, %eax ; X86-SSE2-NEXT: shldl %cl, %edx, %eax @@ -717,7 +717,7 @@ define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind { define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshl_i32_zero1: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: xorl %edx, %edx ; X86-SSE2-NEXT: shldl %cl, %edx, %eax @@ -754,7 +754,7 @@ define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind { define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshr_i32_zero0: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: xorl %edx, %edx ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax @@ -791,7 +791,7 @@ define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind { define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind { ; X86-SSE2-LABEL: fshr_i32_zero1: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: xorl %eax, %eax ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax @@ -1047,7 +1047,7 @@ define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl %edx, %esi ; X86-SSE2-NEXT: shll %cl, %esi @@ -1075,7 +1075,7 @@ define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2-LABEL: or_shl_rotl: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: shll %cl, %edx ; X86-SSE2-NEXT: roll %cl, %eax @@ -1102,7 +1102,7 @@ define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl %edx, %esi ; X86-SSE2-NEXT: shll %cl, %esi @@ -1130,7 +1130,7 @@ define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2-LABEL: or_shl_rotl_commute: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: shll %cl, %edx ; X86-SSE2-NEXT: roll %cl, %eax @@ -1157,7 +1157,7 @@ define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl %edx, %esi ; X86-SSE2-NEXT: shrl %cl, %esi @@ -1185,7 +1185,7 @@ define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2-LABEL: or_lshr_rotr: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: shrl %cl, %edx ; X86-SSE2-NEXT: rorl %cl, %eax @@ -1212,7 +1212,7 @@ define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: pushl %esi ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl %edx, %esi ; X86-SSE2-NEXT: shrl %cl, %esi @@ -1240,7 +1240,7 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2-LABEL: or_lshr_rotr_commute: ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: shrl %cl, %edx ; X86-SSE2-NEXT: rorl %cl, %eax @@ -1265,7 +1265,7 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind { define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2-LABEL: or_shl_fshl_simplify: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: shldl %cl, %edx, %eax @@ -1287,7 +1287,7 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind { define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind { ; X86-SSE2-LABEL: or_lshr_fshr_simplify: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax diff --git a/llvm/test/CodeGen/X86/gpr-to-mask.ll b/llvm/test/CodeGen/X86/gpr-to-mask.ll index 3493210cff3363..248aa3a77d2b23 100644 --- a/llvm/test/CodeGen/X86/gpr-to-mask.ll +++ b/llvm/test/CodeGen/X86/gpr-to-mask.ll @@ -105,11 +105,11 @@ define void @test_load_add(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB2_2 ; X86-64-NEXT: # %bb.1: # %if -; X86-64-NEXT: movzbl (%rdx), %eax +; X86-64-NEXT: movb (%rdx), %al ; X86-64-NEXT: addb (%rcx), %al ; X86-64-NEXT: jmp .LBB2_3 ; X86-64-NEXT: .LBB2_2: # %else -; X86-64-NEXT: movzbl (%rcx), %eax +; X86-64-NEXT: movb (%rcx), %al ; X86-64-NEXT: .LBB2_3: # %exit ; X86-64-NEXT: kmovd %eax, %k1 ; X86-64-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} @@ -126,11 +126,11 @@ define void @test_load_add(i1 %cond, ptr %fptr, ptr %iptr1, ptr %iptr2, float %f ; X86-32-NEXT: je .LBB2_2 ; X86-32-NEXT: # %bb.1: # %if ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-32-NEXT: movzbl (%edx), %edx +; X86-32-NEXT: movb (%edx), %dl ; X86-32-NEXT: addb (%ecx), %dl ; X86-32-NEXT: jmp .LBB2_3 ; X86-32-NEXT: .LBB2_2: # %else -; X86-32-NEXT: movzbl (%ecx), %edx +; X86-32-NEXT: movb (%ecx), %dl ; X86-32-NEXT: .LBB2_3: # %exit ; X86-32-NEXT: kmovd %edx, %k1 ; X86-32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} @@ -212,10 +212,10 @@ define void @test_loadi1_storei1(i1 %cond, ptr %iptr1, ptr %iptr2, ptr %iptr3) ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB4_2 ; X86-64-NEXT: # %bb.1: # %if -; X86-64-NEXT: movzbl (%rsi), %eax +; X86-64-NEXT: movb (%rsi), %al ; X86-64-NEXT: jmp .LBB4_3 ; X86-64-NEXT: .LBB4_2: # %else -; X86-64-NEXT: movzbl (%rdx), %eax +; X86-64-NEXT: movb (%rdx), %al ; X86-64-NEXT: .LBB4_3: # %exit ; X86-64-NEXT: andb $1, %al ; X86-64-NEXT: movb %al, (%rcx) @@ -232,7 +232,7 @@ define void @test_loadi1_storei1(i1 %cond, ptr %iptr1, ptr %iptr2, ptr %iptr3) ; X86-32-NEXT: .LBB4_2: # %else ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-32-NEXT: .LBB4_3: # %exit -; X86-32-NEXT: movzbl (%ecx), %ecx +; X86-32-NEXT: movb (%ecx), %cl ; X86-32-NEXT: andb $1, %cl ; X86-32-NEXT: movb %cl, (%eax) ; X86-32-NEXT: retl @@ -320,11 +320,11 @@ define void @test_shr1(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x ; X86-64-NEXT: testb $1, %dil ; X86-64-NEXT: je .LBB6_2 ; X86-64-NEXT: # %bb.1: # %if -; X86-64-NEXT: movzbl (%rsi), %eax +; X86-64-NEXT: movb (%rsi), %al ; X86-64-NEXT: shrb %al ; X86-64-NEXT: jmp .LBB6_3 ; X86-64-NEXT: .LBB6_2: # %else -; X86-64-NEXT: movzbl (%rdx), %eax +; X86-64-NEXT: movb (%rdx), %al ; X86-64-NEXT: .LBB6_3: # %exit ; X86-64-NEXT: kmovd %eax, %k1 ; X86-64-NEXT: vmovaps %zmm0, %zmm1 {%k1} @@ -341,12 +341,12 @@ define void @test_shr1(i1 %cond, ptr %ptr1, ptr %ptr2, <8 x float> %fvec1, <8 x ; X86-32-NEXT: je .LBB6_2 ; X86-32-NEXT: # %bb.1: # %if ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-32-NEXT: movzbl (%ecx), %ecx +; X86-32-NEXT: movb (%ecx), %cl ; X86-32-NEXT: shrb %cl ; X86-32-NEXT: jmp .LBB6_3 ; X86-32-NEXT: .LBB6_2: # %else ; X86-32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-32-NEXT: movzbl (%ecx), %ecx +; X86-32-NEXT: movb (%ecx), %cl ; X86-32-NEXT: .LBB6_3: # %exit ; X86-32-NEXT: kmovd %ecx, %k1 ; X86-32-NEXT: vmovaps %zmm0, %zmm1 {%k1} diff --git a/llvm/test/CodeGen/X86/h-register-addressing-32.ll b/llvm/test/CodeGen/X86/h-register-addressing-32.ll index eee1fd6d522ce4..f485387994ce6e 100644 --- a/llvm/test/CodeGen/X86/h-register-addressing-32.ll +++ b/llvm/test/CodeGen/X86/h-register-addressing-32.ll @@ -46,7 +46,7 @@ define i8 @foo1(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { ; CHECK-LABEL: foo1: ; CHECK: # %bb.0: ; CHECK-NEXT: movzbl %dh, %ecx -; CHECK-NEXT: movzbl (%eax,%ecx), %eax +; CHECK-NEXT: movb (%eax,%ecx), %al ; CHECK-NEXT: retl %t0 = lshr i32 %x, 8 %t1 = and i32 %t0, 255 @@ -59,7 +59,7 @@ define i8 @bar8(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { ; CHECK-LABEL: bar8: ; CHECK: # %bb.0: ; CHECK-NEXT: movzbl %dh, %ecx -; CHECK-NEXT: movzbl (%eax,%ecx,8), %eax +; CHECK-NEXT: movb (%eax,%ecx,8), %al ; CHECK-NEXT: retl %t0 = lshr i32 %x, 5 %t1 = and i32 %t0, 2040 @@ -72,7 +72,7 @@ define i8 @bar4(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { ; CHECK-LABEL: bar4: ; CHECK: # %bb.0: ; CHECK-NEXT: movzbl %dh, %ecx -; CHECK-NEXT: movzbl (%eax,%ecx,4), %eax +; CHECK-NEXT: movb (%eax,%ecx,4), %al ; CHECK-NEXT: retl %t0 = lshr i32 %x, 6 %t1 = and i32 %t0, 1020 @@ -85,7 +85,7 @@ define i8 @bar2(ptr nocapture inreg %p, i32 inreg %x) nounwind readonly { ; CHECK-LABEL: bar2: ; CHECK: # %bb.0: ; CHECK-NEXT: movzbl %dh, %ecx -; CHECK-NEXT: movzbl (%eax,%ecx,2), %eax +; CHECK-NEXT: movb (%eax,%ecx,2), %al ; CHECK-NEXT: retl %t0 = lshr i32 %x, 7 %t1 = and i32 %t0, 510 diff --git a/llvm/test/CodeGen/X86/h-register-addressing-64.ll b/llvm/test/CodeGen/X86/h-register-addressing-64.ll index 5136cc84c81f66..cfe5d26f2d606b 100644 --- a/llvm/test/CodeGen/X86/h-register-addressing-64.ll +++ b/llvm/test/CodeGen/X86/h-register-addressing-64.ll @@ -50,7 +50,7 @@ define i8 @foo1(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly { ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movzbl %ah, %eax -; CHECK-NEXT: movzbl (%rdi,%rax), %eax +; CHECK-NEXT: movb (%rdi,%rax), %al ; CHECK-NEXT: retq %t0 = lshr i64 %x, 8 %t1 = and i64 %t0, 255 @@ -64,7 +64,7 @@ define i8 @bar8(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly { ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movzbl %ah, %eax -; CHECK-NEXT: movzbl (%rdi,%rax,8), %eax +; CHECK-NEXT: movb (%rdi,%rax,8), %al ; CHECK-NEXT: retq %t0 = lshr i64 %x, 5 %t1 = and i64 %t0, 2040 @@ -78,7 +78,7 @@ define i8 @bar4(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly { ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movzbl %ah, %eax -; CHECK-NEXT: movzbl (%rdi,%rax,4), %eax +; CHECK-NEXT: movb (%rdi,%rax,4), %al ; CHECK-NEXT: retq %t0 = lshr i64 %x, 6 %t1 = and i64 %t0, 1020 @@ -92,7 +92,7 @@ define i8 @bar2(ptr nocapture inreg %p, i64 inreg %x) nounwind readonly { ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movzbl %ah, %eax -; CHECK-NEXT: movzbl (%rdi,%rax,2), %eax +; CHECK-NEXT: movb (%rdi,%rax,2), %al ; CHECK-NEXT: retq %t0 = lshr i64 %x, 7 %t1 = and i64 %t0, 510 diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll index 7897a20403d06f..8ebc2640a8d69e 100644 --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -22,8 +22,8 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_signbit_eq: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: testb $-128, %al ; X86-NEXT: sete %al @@ -46,8 +46,8 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_lowestbit_eq: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: testb $1, %al ; X86-NEXT: sete %al @@ -70,8 +70,8 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_bitsinmiddle_eq: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: testb $24, %al ; X86-NEXT: sete %al @@ -96,7 +96,7 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; X86-BMI1-LABEL: scalar_i16_signbit_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000 @@ -105,7 +105,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i16_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI2-NEXT: sete %al @@ -135,7 +135,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { ; X86-BMI1-LABEL: scalar_i16_lowestbit_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: testb $1, %al @@ -144,7 +144,7 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i16_lowestbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: testb $1, %al ; X86-BMI2-NEXT: sete %al @@ -174,7 +174,7 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0 @@ -183,7 +183,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI2-NEXT: sete %al @@ -215,7 +215,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; X86-BMI1-LABEL: scalar_i32_signbit_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000 @@ -224,7 +224,7 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i32_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI2-NEXT: sete %al @@ -254,7 +254,7 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { ; X86-BMI1-LABEL: scalar_i32_lowestbit_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: testb $1, %al @@ -263,7 +263,7 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i32_lowestbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: testb $1, %al ; X86-BMI2-NEXT: sete %al @@ -293,7 +293,7 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00 @@ -302,7 +302,7 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI2-NEXT: sete %al @@ -335,7 +335,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; X86-BMI1-LABEL: scalar_i64_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl %eax, %esi @@ -350,7 +350,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i64_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shldl %cl, %eax, %edx @@ -385,7 +385,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; X86-BMI1-LABEL: scalar_i64_lowestbit_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: xorl %edx, %edx @@ -397,7 +397,7 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i64_lowestbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: xorl %edx, %edx ; X86-BMI2-NEXT: testb $32, %al @@ -431,7 +431,7 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl %eax, %esi @@ -451,7 +451,7 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shldl %cl, %eax, %edx @@ -772,8 +772,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_signbit_ne: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: shrb $7, %al ; X86-NEXT: retl @@ -820,7 +820,7 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind { define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { ; X86-BMI1-LABEL: scalar_i32_x_is_const2_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %eax ; X86-BMI1-NEXT: shrl %cl, %eax ; X86-BMI1-NEXT: testl %eax, %eax @@ -829,7 +829,7 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i32_x_is_const2_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $1, %ecx ; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax ; X86-BMI2-NEXT: testl %eax, %eax @@ -877,7 +877,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_signbit_eq_with_nonzero: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movb $-128, %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll index ddd6f002a0992c..63d4e93eeb307f 100644 --- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -22,8 +22,8 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_signbit_eq: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: testb $-128, %al ; X86-NEXT: sete %al @@ -66,8 +66,8 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_bitsinmiddle_eq: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: testb $24, %al ; X86-NEXT: sete %al @@ -92,7 +92,7 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; X86-BMI1-LABEL: scalar_i16_signbit_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shrl %cl, %eax ; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000 @@ -102,7 +102,7 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; X86-BMI2-LABEL: scalar_i16_signbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax ; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI2-NEXT: sete %al @@ -154,7 +154,7 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shrl %cl, %eax ; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0 @@ -164,7 +164,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax ; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI2-NEXT: sete %al @@ -198,7 +198,7 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; X86-BMI1-LABEL: scalar_i32_signbit_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shrl %cl, %eax ; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000 @@ -207,7 +207,7 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i32_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI2-NEXT: sete %al @@ -257,7 +257,7 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shrl %cl, %eax ; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00 @@ -266,7 +266,7 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI2-NEXT: sete %al @@ -298,7 +298,7 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; X86-BMI1-LABEL: scalar_i64_signbit_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: shrl %cl, %eax ; X86-BMI1-NEXT: xorl %edx, %edx @@ -310,7 +310,7 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i64_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: xorl %edx, %edx ; X86-BMI2-NEXT: testb $32, %al @@ -344,7 +344,7 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; X86-BMI1-LABEL: scalar_i64_lowestbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $1, %eax ; X86-BMI1-NEXT: xorl %esi, %esi ; X86-BMI1-NEXT: xorl %edx, %edx @@ -363,7 +363,7 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; X86-BMI2-LABEL: scalar_i64_lowestbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl $1, %eax ; X86-BMI2-NEXT: xorl %edx, %edx ; X86-BMI2-NEXT: xorl %esi, %esi @@ -394,7 +394,7 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; X86-BMI1-LABEL: scalar_i64_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI1-NEXT: movl %edx, %esi @@ -414,7 +414,7 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; X86-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: shrdl %cl, %edx, %eax @@ -709,8 +709,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwi define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_signbit_ne: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb %cl, %al ; X86-NEXT: shrb $7, %al ; X86-NEXT: retl @@ -737,7 +737,7 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind { ; X86-BMI1-LABEL: scalar_i32_x_is_const_eq: ; X86-BMI1: # %bb.0: -; X86-BMI1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-BMI1-NEXT: movl $-1437226411, %eax # imm = 0xAA55AA55 ; X86-BMI1-NEXT: shll %cl, %eax ; X86-BMI1-NEXT: testb $1, %al @@ -746,7 +746,7 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind { ; ; X86-BMI2-LABEL: scalar_i32_x_is_const_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $-1437226411, %ecx # imm = 0xAA55AA55 ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax ; X86-BMI2-NEXT: testb $1, %al @@ -803,7 +803,7 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { ; X86-LABEL: negative_scalar_i8_bitsinmiddle_slt: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movb $24, %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al @@ -828,7 +828,7 @@ define i1 @negative_scalar_i8_bitsinmiddle_slt(i8 %x, i8 %y) nounwind { define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8_signbit_eq_with_nonzero: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movb $-128, %al ; X86-NEXT: shlb %cl, %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll index 55c318e87a5a0e..c3cb3b30aa0605 100644 --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -12,7 +12,7 @@ define i8 @test_i8(i8 %a) nounwind { ; X86-LABEL: test_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarb $7, %cl ; X86-NEXT: xorb %cl, %al diff --git a/llvm/test/CodeGen/X86/inc-of-add.ll b/llvm/test/CodeGen/X86/inc-of-add.ll index a899660031d452..386e04f421e924 100644 --- a/llvm/test/CodeGen/X86/inc-of-add.ll +++ b/llvm/test/CodeGen/X86/inc-of-add.ll @@ -12,7 +12,7 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: incb %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll index 88c5c2730ec9c1..a30cbafbc2085d 100644 --- a/llvm/test/CodeGen/X86/insertelement-var-index.ll +++ b/llvm/test/CodeGen/X86/insertelement-var-index.ll @@ -638,7 +638,7 @@ define <32 x i8> @load_i8_v32i8_undef(ptr %p, i32 %y) nounwind { ; SSE-LABEL: load_i8_v32i8_undef: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: andl $31, %esi ; SSE-NEXT: movb %al, -40(%rsp,%rsi) ; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 @@ -865,7 +865,7 @@ define <16 x i8> @arg_i8_v16i8(<16 x i8> %v, i8 %x, i32 %y) nounwind { ; X86AVX2-NEXT: subl $32, %esp ; X86AVX2-NEXT: movl 12(%ebp), %eax ; X86AVX2-NEXT: andl $15, %eax -; X86AVX2-NEXT: movzbl 8(%ebp), %ecx +; X86AVX2-NEXT: movb 8(%ebp), %cl ; X86AVX2-NEXT: vmovaps %xmm0, (%esp) ; X86AVX2-NEXT: movb %cl, (%esp,%eax) ; X86AVX2-NEXT: vmovaps (%esp), %xmm0 @@ -1160,7 +1160,7 @@ define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind { ; SSE-LABEL: load_i8_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: andl $15, %esi ; SSE-NEXT: movb %al, -24(%rsp,%rsi) @@ -1170,7 +1170,7 @@ define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind { ; AVX1OR2-LABEL: load_i8_v16i8: ; AVX1OR2: # %bb.0: ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi -; AVX1OR2-NEXT: movzbl (%rdi), %eax +; AVX1OR2-NEXT: movb (%rdi), %al ; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX1OR2-NEXT: andl $15, %esi ; AVX1OR2-NEXT: movb %al, -24(%rsp,%rsi) @@ -1180,7 +1180,7 @@ define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind { ; AVX512F-LABEL: load_i8_v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi -; AVX512F-NEXT: movzbl (%rdi), %eax +; AVX512F-NEXT: movb (%rdi), %al ; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AVX512F-NEXT: andl $15, %esi ; AVX512F-NEXT: movb %al, -24(%rsp,%rsi) @@ -1203,7 +1203,7 @@ define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind { ; X86AVX2-NEXT: movl 12(%ebp), %eax ; X86AVX2-NEXT: andl $15, %eax ; X86AVX2-NEXT: movl 8(%ebp), %ecx -; X86AVX2-NEXT: movzbl (%ecx), %ecx +; X86AVX2-NEXT: movb (%ecx), %cl ; X86AVX2-NEXT: vmovaps %xmm0, (%esp) ; X86AVX2-NEXT: movb %cl, (%esp,%eax) ; X86AVX2-NEXT: vmovaps (%esp), %xmm0 @@ -1572,7 +1572,7 @@ define <32 x i8> @arg_i8_v32i8(<32 x i8> %v, i8 %x, i32 %y) nounwind { ; X86AVX2-NEXT: subl $64, %esp ; X86AVX2-NEXT: movl 12(%ebp), %eax ; X86AVX2-NEXT: andl $31, %eax -; X86AVX2-NEXT: movzbl 8(%ebp), %ecx +; X86AVX2-NEXT: movb 8(%ebp), %cl ; X86AVX2-NEXT: vmovaps %ymm0, (%esp) ; X86AVX2-NEXT: movb %cl, (%esp,%eax) ; X86AVX2-NEXT: vmovaps (%esp), %ymm0 @@ -1884,7 +1884,7 @@ define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind { ; SSE-LABEL: load_i8_v32i8: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: andl $31, %esi @@ -1900,7 +1900,7 @@ define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind { ; AVX1OR2-NEXT: andq $-32, %rsp ; AVX1OR2-NEXT: subq $64, %rsp ; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi -; AVX1OR2-NEXT: movzbl (%rdi), %eax +; AVX1OR2-NEXT: movb (%rdi), %al ; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) ; AVX1OR2-NEXT: andl $31, %esi ; AVX1OR2-NEXT: movb %al, (%rsp,%rsi) @@ -1916,7 +1916,7 @@ define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind { ; AVX512F-NEXT: andq $-32, %rsp ; AVX512F-NEXT: subq $64, %rsp ; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi -; AVX512F-NEXT: movzbl (%rdi), %eax +; AVX512F-NEXT: movb (%rdi), %al ; AVX512F-NEXT: vmovaps %ymm0, (%rsp) ; AVX512F-NEXT: andl $31, %esi ; AVX512F-NEXT: movb %al, (%rsp,%rsi) @@ -1941,7 +1941,7 @@ define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind { ; X86AVX2-NEXT: movl 12(%ebp), %eax ; X86AVX2-NEXT: andl $31, %eax ; X86AVX2-NEXT: movl 8(%ebp), %ecx -; X86AVX2-NEXT: movzbl (%ecx), %ecx +; X86AVX2-NEXT: movb (%ecx), %cl ; X86AVX2-NEXT: vmovaps %ymm0, (%esp) ; X86AVX2-NEXT: movb %cl, (%esp,%eax) ; X86AVX2-NEXT: vmovaps (%esp), %ymm0 diff --git a/llvm/test/CodeGen/X86/isel-sink2.ll b/llvm/test/CodeGen/X86/isel-sink2.ll index 46ff70a7464347..b82f87a274fe6e 100644 --- a/llvm/test/CodeGen/X86/isel-sink2.ll +++ b/llvm/test/CodeGen/X86/isel-sink2.ll @@ -8,7 +8,7 @@ define i8 @test(ptr%P) nounwind { ; CHECK-NEXT: cmpb $0, 4(%eax) ; CHECK-NEXT: je .LBB0_1 ; CHECK-NEXT: # %bb.2: # %F -; CHECK-NEXT: movzbl 7(%eax), %eax +; CHECK-NEXT: movb 7(%eax), %al ; CHECK-NEXT: retl ; CHECK-NEXT: .LBB0_1: # %TB ; CHECK-NEXT: movb $4, %al diff --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll index 57643e34135201..a8700ce4f161d4 100644 --- a/llvm/test/CodeGen/X86/legalize-shift-64.ll +++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll @@ -5,7 +5,7 @@ define i64 @test1(i32 %xx, i32 %test) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: andb $7, %cl ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: shll %cl, %eax @@ -26,7 +26,7 @@ define i64 @test2(i64 %xx, i32 %test) nounwind { ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: andb $7, %cl ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: shll %cl, %eax @@ -44,7 +44,7 @@ define i64 @test3(i64 %xx, i32 %test) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: andb $7, %cl ; CHECK-NEXT: shrdl %cl, %edx, %eax ; CHECK-NEXT: shrl %cl, %edx @@ -60,7 +60,7 @@ define i64 @test4(i64 %xx, i32 %test) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: andb $7, %cl ; CHECK-NEXT: shrdl %cl, %edx, %eax ; CHECK-NEXT: sarl %cl, %edx diff --git a/llvm/test/CodeGen/X86/lifetime-alias.ll b/llvm/test/CodeGen/X86/lifetime-alias.ll index 3efaccba1b63d5..c621391fb8c830 100644 --- a/llvm/test/CodeGen/X86/lifetime-alias.ll +++ b/llvm/test/CodeGen/X86/lifetime-alias.ll @@ -55,9 +55,9 @@ define i8 @main() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al ; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al ; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax ; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) @@ -81,7 +81,7 @@ define i8 @main() local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: .LBB0_3: # %_ZNSt3__312basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED2Ev.exit50 -; CHECK-NEXT: movzbl 16(%rax), %eax +; CHECK-NEXT: movb 16(%rax), %al ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/load-local-v3i1.ll b/llvm/test/CodeGen/X86/load-local-v3i1.ll index ae3d9cdb326d36..a0483d609e8a6b 100644 --- a/llvm/test/CodeGen/X86/load-local-v3i1.ll +++ b/llvm/test/CodeGen/X86/load-local-v3i1.ll @@ -94,7 +94,7 @@ define void @local_load_v3i1(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr %p ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movq %rdi, %r14 -; CHECK-NEXT: movzbl (%rdx), %eax +; CHECK-NEXT: movb (%rdx), %al ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrb %cl ; CHECK-NEXT: andb $1, %cl diff --git a/llvm/test/CodeGen/X86/load-local-v4i5.ll b/llvm/test/CodeGen/X86/load-local-v4i5.ll index 34100bdbc2c4e5..b04373935858a8 100644 --- a/llvm/test/CodeGen/X86/load-local-v4i5.ll +++ b/llvm/test/CodeGen/X86/load-local-v4i5.ll @@ -8,7 +8,7 @@ define void @_start() { ; CHECK: # %bb.0: # %Entry ; CHECK-NEXT: movl __unnamed_1(%rip), %eax ; CHECK-NEXT: movl %eax, -12(%rsp) -; CHECK-NEXT: movzbl -9(%rsp), %ecx +; CHECK-NEXT: movb -9(%rsp), %cl ; CHECK-NEXT: movzbl -10(%rsp), %edx ; CHECK-NEXT: movzbl -11(%rsp), %esi ; CHECK-NEXT: andl $31, %eax diff --git a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll index fa0e6648b87122..98da9b1239c40e 100644 --- a/llvm/test/CodeGen/X86/load-scalar-as-vector.ll +++ b/llvm/test/CodeGen/X86/load-scalar-as-vector.ll @@ -90,7 +90,7 @@ define <2 x i64> @sub_op0_constant(ptr %p) nounwind { define <16 x i8> @sub_op1_constant(ptr %p) nounwind { ; SSE-LABEL: sub_op1_constant: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: addb $-42, %al ; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: movd %eax, %xmm0 @@ -98,7 +98,7 @@ define <16 x i8> @sub_op1_constant(ptr %p) nounwind { ; ; AVX-LABEL: sub_op1_constant: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax +; AVX-NEXT: movb (%rdi), %al ; AVX-NEXT: addb $-42, %al ; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vmovd %eax, %xmm0 @@ -210,7 +210,7 @@ define <8 x i16> @xor_op1_constant(ptr %p) nounwind { define <4 x i32> @shl_op0_constant(ptr %p) nounwind { ; SSE-LABEL: shl_op0_constant: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %ecx +; SSE-NEXT: movb (%rdi), %cl ; SSE-NEXT: movl $42, %eax ; SSE-NEXT: shll %cl, %eax ; SSE-NEXT: movd %eax, %xmm0 @@ -218,7 +218,7 @@ define <4 x i32> @shl_op0_constant(ptr %p) nounwind { ; ; AVX-LABEL: shl_op0_constant: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %ecx +; AVX-NEXT: movb (%rdi), %cl ; AVX-NEXT: movl $42, %eax ; AVX-NEXT: shll %cl, %eax ; AVX-NEXT: vmovd %eax, %xmm0 @@ -232,7 +232,7 @@ define <4 x i32> @shl_op0_constant(ptr %p) nounwind { define <16 x i8> @shl_op1_constant(ptr %p) nounwind { ; SSE-LABEL: shl_op1_constant: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: shlb $5, %al ; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: movd %eax, %xmm0 @@ -240,7 +240,7 @@ define <16 x i8> @shl_op1_constant(ptr %p) nounwind { ; ; AVX-LABEL: shl_op1_constant: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax +; AVX-NEXT: movb (%rdi), %al ; AVX-NEXT: shlb $5, %al ; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vmovd %eax, %xmm0 @@ -254,7 +254,7 @@ define <16 x i8> @shl_op1_constant(ptr %p) nounwind { define <2 x i64> @lshr_op0_constant(ptr %p) nounwind { ; SSE-LABEL: lshr_op0_constant: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %ecx +; SSE-NEXT: movb (%rdi), %cl ; SSE-NEXT: movl $42, %eax ; SSE-NEXT: shrq %cl, %rax ; SSE-NEXT: movq %rax, %xmm0 @@ -262,7 +262,7 @@ define <2 x i64> @lshr_op0_constant(ptr %p) nounwind { ; ; AVX-LABEL: lshr_op0_constant: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %ecx +; AVX-NEXT: movb (%rdi), %cl ; AVX-NEXT: movl $42, %eax ; AVX-NEXT: shrq %cl, %rax ; AVX-NEXT: vmovq %rax, %xmm0 @@ -296,7 +296,7 @@ define <4 x i32> @lshr_op1_constant(ptr %p) nounwind { define <8 x i16> @ashr_op0_constant(ptr %p) nounwind { ; SSE-LABEL: ashr_op0_constant: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %ecx +; SSE-NEXT: movb (%rdi), %cl ; SSE-NEXT: movl $-42, %eax ; SSE-NEXT: sarl %cl, %eax ; SSE-NEXT: movd %eax, %xmm0 @@ -304,7 +304,7 @@ define <8 x i16> @ashr_op0_constant(ptr %p) nounwind { ; ; AVX-LABEL: ashr_op0_constant: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %ecx +; AVX-NEXT: movb (%rdi), %cl ; AVX-NEXT: movl $-42, %eax ; AVX-NEXT: sarl %cl, %eax ; AVX-NEXT: vmovd %eax, %xmm0 @@ -520,7 +520,7 @@ define <2 x i64> @urem_op0_constant(ptr %p) nounwind { define <16 x i8> @urem_op1_constant(ptr %p) nounwind { ; SSE-LABEL: urem_op1_constant: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: movl %eax, %ecx ; SSE-NEXT: shrb %cl ; SSE-NEXT: movzbl %cl, %ecx @@ -534,7 +534,7 @@ define <16 x i8> @urem_op1_constant(ptr %p) nounwind { ; ; AVX-LABEL: urem_op1_constant: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax +; AVX-NEXT: movb (%rdi), %al ; AVX-NEXT: movl %eax, %ecx ; AVX-NEXT: shrb %cl ; AVX-NEXT: movzbl %cl, %ecx diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 3d714f31ba72ed..0d6c6a06e4adf0 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -2519,11 +2519,11 @@ define <3 x i32> @test30(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i ; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 ; KNL_32-NEXT: movw $-3, %ax ; KNL_32-NEXT: kmovw %eax, %k0 -; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_32-NEXT: andl $1, %eax ; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: kandw %k0, %k1, %k0 -; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_32-NEXT: kshiftrw $14, %k1, %k1 @@ -2531,7 +2531,7 @@ define <3 x i32> @test30(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i ; KNL_32-NEXT: movw $-5, %ax ; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: kandw %k1, %k0, %k0 -; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_32-NEXT: kshiftrw $13, %k1, %k1 @@ -2576,12 +2576,12 @@ define <3 x i32> @test30(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i ; SKX_32: # %bb.0: ; SKX_32-NEXT: movb $-3, %al ; SKX_32-NEXT: kmovw %eax, %k0 -; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: kmovw %eax, %k1 ; SKX_32-NEXT: kshiftlb $7, %k1, %k1 ; SKX_32-NEXT: kshiftrb $7, %k1, %k1 ; SKX_32-NEXT: kandw %k0, %k1, %k0 -; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: kmovw %eax, %k1 ; SKX_32-NEXT: kshiftlb $7, %k1, %k1 ; SKX_32-NEXT: kshiftrb $6, %k1, %k1 @@ -2589,7 +2589,7 @@ define <3 x i32> @test30(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i ; SKX_32-NEXT: movb $-5, %al ; SKX_32-NEXT: kmovw %eax, %k1 ; SKX_32-NEXT: kandw %k1, %k0, %k0 -; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: kmovw %eax, %k1 ; SKX_32-NEXT: kshiftlb $7, %k1, %k1 ; SKX_32-NEXT: kshiftrb $5, %k1, %k1 @@ -2642,11 +2642,11 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> ; KNL_32-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 ; KNL_32-NEXT: movw $-3, %ax ; KNL_32-NEXT: kmovw %eax, %k0 -; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_32-NEXT: andl $1, %eax ; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: kandw %k0, %k1, %k0 -; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_32-NEXT: kshiftrw $14, %k1, %k1 @@ -2654,7 +2654,7 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> ; KNL_32-NEXT: movw $-5, %ax ; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: kandw %k1, %k0, %k0 -; KNL_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; KNL_32-NEXT: movb {{[0-9]+}}(%esp), %al ; KNL_32-NEXT: kmovw %eax, %k1 ; KNL_32-NEXT: kshiftlw $15, %k1, %k1 ; KNL_32-NEXT: kshiftrw $13, %k1, %k1 @@ -2697,12 +2697,12 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> ; SKX_32: # %bb.0: ; SKX_32-NEXT: movb $-3, %al ; SKX_32-NEXT: kmovw %eax, %k0 -; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: kmovw %eax, %k1 ; SKX_32-NEXT: kshiftlb $7, %k1, %k1 ; SKX_32-NEXT: kshiftrb $7, %k1, %k1 ; SKX_32-NEXT: kandw %k0, %k1, %k0 -; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: kmovw %eax, %k1 ; SKX_32-NEXT: kshiftlb $7, %k1, %k1 ; SKX_32-NEXT: kshiftrb $6, %k1, %k1 @@ -2710,7 +2710,7 @@ define void @test30b(<3 x ptr> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> ; SKX_32-NEXT: movb $-5, %al ; SKX_32-NEXT: kmovw %eax, %k1 ; SKX_32-NEXT: kandw %k1, %k0, %k0 -; SKX_32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SKX_32-NEXT: movb {{[0-9]+}}(%esp), %al ; SKX_32-NEXT: kmovw %eax, %k1 ; SKX_32-NEXT: kshiftlb $7, %k1, %k1 ; SKX_32-NEXT: kshiftrb $5, %k1, %k1 diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll index bd55b98f627c65..a93d64f4f53bb3 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs-x32.ll @@ -176,7 +176,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %edx ; X86-NEXT: xorw (%eax), %dx -; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: movb 2(%ecx), %cl ; X86-NEXT: xorb 2(%eax), %cl ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: orw %dx, %ax @@ -308,7 +308,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %edx ; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: movb 4(%ecx), %cl ; X86-NEXT: xorb 4(%eax), %cl ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: orl %edx, %eax @@ -510,7 +510,7 @@ define i1 @length9_eq(ptr %X, ptr %Y) nounwind { ; X86-NEXT: xorl (%eax), %edx ; X86-NEXT: xorl 4(%eax), %esi ; X86-NEXT: orl %edx, %esi -; X86-NEXT: movzbl 8(%ecx), %ecx +; X86-NEXT: movb 8(%ecx), %cl ; X86-NEXT: xorb 8(%eax), %cl ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: orl %esi, %eax @@ -641,7 +641,7 @@ define i1 @length13_eq(ptr %X, ptr %Y) nounwind { ; X86-NEXT: orl %esi, %eax ; X86-NEXT: movl 8(%edx), %esi ; X86-NEXT: xorl 8(%ecx), %esi -; X86-NEXT: movzbl 12(%edx), %edx +; X86-NEXT: movb 12(%edx), %dl ; X86-NEXT: xorb 12(%ecx), %dl ; X86-NEXT: movzbl %dl, %ecx ; X86-NEXT: orl %esi, %ecx diff --git a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll index ae5d9e1715b31f..28182ccada5e2e 100644 --- a/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll +++ b/llvm/test/CodeGen/X86/memcmp-more-load-pairs.ll @@ -165,7 +165,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: xorw (%rsi), %ax -; X64-NEXT: movzbl 2(%rdi), %ecx +; X64-NEXT: movb 2(%rdi), %cl ; X64-NEXT: xorb 2(%rsi), %cl ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: orw %ax, %cx @@ -281,7 +281,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: movzbl 4(%rdi), %ecx +; X64-NEXT: movb 4(%rdi), %cl ; X64-NEXT: xorb 4(%rsi), %cl ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: orl %eax, %ecx @@ -439,7 +439,7 @@ define i1 @length9_eq(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movzbl 8(%rdi), %ecx +; X64-NEXT: movb 8(%rdi), %cl ; X64-NEXT: xorb 8(%rsi), %cl ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: orq %rax, %rcx diff --git a/llvm/test/CodeGen/X86/memcmp-x32.ll b/llvm/test/CodeGen/X86/memcmp-x32.ll index 718db45a8bd64c..132fb581bb0e07 100644 --- a/llvm/test/CodeGen/X86/memcmp-x32.ll +++ b/llvm/test/CodeGen/X86/memcmp-x32.ll @@ -204,7 +204,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %edx ; X86-NEXT: xorw (%eax), %dx -; X86-NEXT: movzbl 2(%ecx), %ecx +; X86-NEXT: movb 2(%ecx), %cl ; X86-NEXT: xorb 2(%eax), %cl ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: orw %dx, %ax @@ -336,7 +336,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %edx ; X86-NEXT: xorl (%eax), %edx -; X86-NEXT: movzbl 4(%ecx), %ecx +; X86-NEXT: movb 4(%ecx), %cl ; X86-NEXT: xorb 4(%eax), %cl ; X86-NEXT: movzbl %cl, %eax ; X86-NEXT: orl %edx, %eax diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 6aac8b8a1f61d2..22d72bf7620b9e 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -191,7 +191,7 @@ define i1 @length3_eq(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movzwl (%rdi), %eax ; X64-NEXT: xorw (%rsi), %ax -; X64-NEXT: movzbl 2(%rdi), %ecx +; X64-NEXT: movb 2(%rdi), %cl ; X64-NEXT: xorb 2(%rsi), %cl ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: orw %ax, %cx @@ -307,7 +307,7 @@ define i1 @length5_eq(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: xorl (%rsi), %eax -; X64-NEXT: movzbl 4(%rdi), %ecx +; X64-NEXT: movb 4(%rdi), %cl ; X64-NEXT: xorb 4(%rsi), %cl ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: orl %eax, %ecx @@ -465,7 +465,7 @@ define i1 @length9_eq(ptr %X, ptr %Y) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rax ; X64-NEXT: xorq (%rsi), %rax -; X64-NEXT: movzbl 8(%rdi), %ecx +; X64-NEXT: movb 8(%rdi), %cl ; X64-NEXT: xorb 8(%rsi), %cl ; X64-NEXT: movzbl %cl, %ecx ; X64-NEXT: orq %rax, %rcx diff --git a/llvm/test/CodeGen/X86/memcpy.ll b/llvm/test/CodeGen/X86/memcpy.ll index 6ec9b20163051b..cdee188062d4e6 100644 --- a/llvm/test/CodeGen/X86/memcpy.ll +++ b/llvm/test/CodeGen/X86/memcpy.ll @@ -467,7 +467,7 @@ define void @PR15348(ptr %a, ptr %b) { ; unaligned loads and stores. ; DARWIN-LABEL: PR15348: ; DARWIN: ## %bb.0: -; DARWIN-NEXT: movzbl 16(%rsi), %eax +; DARWIN-NEXT: movb 16(%rsi), %al ; DARWIN-NEXT: movb %al, 16(%rdi) ; DARWIN-NEXT: movq (%rsi), %rax ; DARWIN-NEXT: movq 8(%rsi), %rcx @@ -477,7 +477,7 @@ define void @PR15348(ptr %a, ptr %b) { ; ; LINUX-LABEL: PR15348: ; LINUX: # %bb.0: -; LINUX-NEXT: movzbl 16(%rsi), %eax +; LINUX-NEXT: movb 16(%rsi), %al ; LINUX-NEXT: movb %al, 16(%rdi) ; LINUX-NEXT: movq (%rsi), %rax ; LINUX-NEXT: movq 8(%rsi), %rcx @@ -487,7 +487,7 @@ define void @PR15348(ptr %a, ptr %b) { ; ; LINUX-SKL-LABEL: PR15348: ; LINUX-SKL: # %bb.0: -; LINUX-SKL-NEXT: movzbl 16(%rsi), %eax +; LINUX-SKL-NEXT: movb 16(%rsi), %al ; LINUX-SKL-NEXT: movb %al, 16(%rdi) ; LINUX-SKL-NEXT: vmovups (%rsi), %xmm0 ; LINUX-SKL-NEXT: vmovups %xmm0, (%rdi) @@ -495,7 +495,7 @@ define void @PR15348(ptr %a, ptr %b) { ; ; LINUX-SKX-LABEL: PR15348: ; LINUX-SKX: # %bb.0: -; LINUX-SKX-NEXT: movzbl 16(%rsi), %eax +; LINUX-SKX-NEXT: movb 16(%rsi), %al ; LINUX-SKX-NEXT: movb %al, 16(%rdi) ; LINUX-SKX-NEXT: vmovups (%rsi), %xmm0 ; LINUX-SKX-NEXT: vmovups %xmm0, (%rdi) @@ -503,7 +503,7 @@ define void @PR15348(ptr %a, ptr %b) { ; ; LINUX-KNL-LABEL: PR15348: ; LINUX-KNL: # %bb.0: -; LINUX-KNL-NEXT: movzbl 16(%rsi), %eax +; LINUX-KNL-NEXT: movb 16(%rsi), %al ; LINUX-KNL-NEXT: movb %al, 16(%rdi) ; LINUX-KNL-NEXT: vmovups (%rsi), %xmm0 ; LINUX-KNL-NEXT: vmovups %xmm0, (%rdi) @@ -511,7 +511,7 @@ define void @PR15348(ptr %a, ptr %b) { ; ; LINUX-AVX512BW-LABEL: PR15348: ; LINUX-AVX512BW: # %bb.0: -; LINUX-AVX512BW-NEXT: movzbl 16(%rsi), %eax +; LINUX-AVX512BW-NEXT: movb 16(%rsi), %al ; LINUX-AVX512BW-NEXT: movb %al, 16(%rdi) ; LINUX-AVX512BW-NEXT: vmovups (%rsi), %xmm0 ; LINUX-AVX512BW-NEXT: vmovups %xmm0, (%rdi) diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll index 1e31ee7ad6b590..955ba1e5d93886 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -893,8 +893,8 @@ define <16 x i8> @merge_16i8_i8_01u3456789ABCDuF(ptr %ptr) nounwind uwtable noin ; X86-SSE1-NEXT: movl 3(%ecx), %esi ; X86-SSE1-NEXT: movl 7(%ecx), %edi ; X86-SSE1-NEXT: movzwl 11(%ecx), %ebx -; X86-SSE1-NEXT: movzbl 13(%ecx), %edx -; X86-SSE1-NEXT: movzbl 15(%ecx), %ecx +; X86-SSE1-NEXT: movb 13(%ecx), %dl +; X86-SSE1-NEXT: movb 15(%ecx), %cl ; X86-SSE1-NEXT: movb %dl, 13(%eax) ; X86-SSE1-NEXT: movb %cl, 15(%eax) ; X86-SSE1-NEXT: movw %bx, 11(%eax) @@ -976,7 +976,7 @@ define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(ptr %ptr) nounwind uwtable noin ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-SSE1-NEXT: movzwl (%ecx), %edx -; X86-SSE1-NEXT: movzbl 3(%ecx), %ecx +; X86-SSE1-NEXT: movb 3(%ecx), %cl ; X86-SSE1-NEXT: movb %cl, 3(%eax) ; X86-SSE1-NEXT: movw %dx, (%eax) ; X86-SSE1-NEXT: movb $0, 15(%eax) diff --git a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll index d48c59a89c3d7e..67b399b36220ad 100644 --- a/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll +++ b/llvm/test/CodeGen/X86/merge-store-partially-alias-loads.ll @@ -7,10 +7,10 @@ ; X86-LABEL: {{^}}merge_store_partial_overlap_load: ; X86-DAG: movzwl ([[BASEREG:%[a-z]+]]), %e[[LO2:[a-z]+]] -; X86-DAG: movzbl 2([[BASEREG]]), %e[[HI1:[a-z]]] +; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]] ; X86-NEXT: movw %[[LO2]], 1([[BASEREG]]) -; X86-NEXT: movb %[[HI1]]l, 3([[BASEREG]]) +; X86-NEXT: movb [[HI1]], 3([[BASEREG]]) ; X86-NEXT: retq ; DBGDAG-LABEL: Optimized legalized selection DAG: %bb.0 'merge_store_partial_overlap_load:' diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll index 40947c27a5eb62..f840966e715467 100644 --- a/llvm/test/CodeGen/X86/midpoint-int.ll +++ b/llvm/test/CodeGen/X86/midpoint-int.ll @@ -967,8 +967,8 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; ; X86-LABEL: scalar_i8_signed_reg_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: cmpb %al, %cl ; X86-NEXT: setle %dl ; X86-NEXT: jg .LBB15_1 @@ -1017,8 +1017,8 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; ; X86-LABEL: scalar_i8_unsigned_reg_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: cmpb %al, %cl ; X86-NEXT: setbe %dl ; X86-NEXT: ja .LBB16_1 @@ -1070,9 +1070,9 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; ; X86-LABEL: scalar_i8_signed_mem_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl (%ecx), %ecx +; X86-NEXT: movb (%ecx), %cl ; X86-NEXT: cmpb %al, %cl ; X86-NEXT: setle %dl ; X86-NEXT: jg .LBB17_1 @@ -1122,9 +1122,9 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { ; ; X86-LABEL: scalar_i8_signed_reg_mem: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: cmpb %al, %cl ; X86-NEXT: setle %dl ; X86-NEXT: jg .LBB18_1 @@ -1177,8 +1177,8 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl (%ecx), %ecx -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%ecx), %cl +; X86-NEXT: movb (%eax), %al ; X86-NEXT: cmpb %al, %cl ; X86-NEXT: setle %dl ; X86-NEXT: jg .LBB19_1 diff --git a/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll b/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll index 2e2ba29da8646c..d15243024c1df4 100644 --- a/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll +++ b/llvm/test/CodeGen/X86/misched_phys_reg_assign_order.ll @@ -19,7 +19,7 @@ define void @g() #0 { ; CHECK-NEXT: .cfi_offset %esi, -16 ; CHECK-NEXT: .cfi_offset %ebx, -12 ; CHECK-NEXT: movl f, %esi -; CHECK-NEXT: movzbl (%esi), %eax +; CHECK-NEXT: movb (%esi), %al ; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %edx, %edx diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll index 9ed2d40f3b54d8..8ed51fde6cbdec 100644 --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -4293,7 +4293,7 @@ define i1 @movmsk_v16i8_var(<16 x i8> %x, <16 x i8> %y, i32 %z) { ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $15, %edi -; SKX-NEXT: movzbl -24(%rsp,%rdi), %eax +; SKX-NEXT: movb -24(%rsp,%rdi), %al ; SKX-NEXT: retq %cmp = icmp eq <16 x i8> %x, %y %val = extractelement <16 x i1> %cmp, i32 %z @@ -4329,7 +4329,7 @@ define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) { ; KNL-NEXT: vpmovdw %zmm0, %ymm0 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $7, %edi -; KNL-NEXT: movzbl -24(%rsp,%rdi,2), %eax +; KNL-NEXT: movb -24(%rsp,%rdi,2), %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -4340,7 +4340,7 @@ define i1 @movmsk_v8i16_var(<8 x i16> %x, <8 x i16> %y, i32 %z) { ; SKX-NEXT: vpmovm2w %k0, %xmm0 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $7, %edi -; SKX-NEXT: movzbl -24(%rsp,%rdi,2), %eax +; SKX-NEXT: movb -24(%rsp,%rdi,2), %al ; SKX-NEXT: retq %cmp = icmp sgt <8 x i16> %x, %y %val = extractelement <8 x i1> %cmp, i32 %z @@ -4373,7 +4373,7 @@ define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) { ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $3, %edi -; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax +; KNL-NEXT: movb -24(%rsp,%rdi,4), %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -4384,7 +4384,7 @@ define i1 @movmsk_v4i32_var(<4 x i32> %x, <4 x i32> %y, i32 %z) { ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $3, %edi -; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax +; SKX-NEXT: movb -24(%rsp,%rdi,4), %al ; SKX-NEXT: retq %cmp = icmp slt <4 x i32> %x, %y %val = extractelement <4 x i1> %cmp, i32 %z @@ -4430,7 +4430,7 @@ define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) { ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $1, %edi -; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax +; KNL-NEXT: movb -24(%rsp,%rdi,8), %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -4441,7 +4441,7 @@ define i1 @movmsk_v2i64_var(<2 x i64> %x, <2 x i64> %y, i32 %z) { ; SKX-NEXT: vpmovm2q %k0, %xmm0 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $1, %edi -; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax +; SKX-NEXT: movb -24(%rsp,%rdi,8), %al ; SKX-NEXT: retq %cmp = icmp ne <2 x i64> %x, %y %val = extractelement <2 x i1> %cmp, i32 %z @@ -4477,7 +4477,7 @@ define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) { ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $3, %edi -; KNL-NEXT: movzbl -24(%rsp,%rdi,4), %eax +; KNL-NEXT: movb -24(%rsp,%rdi,4), %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -4488,7 +4488,7 @@ define i1 @movmsk_v4f32_var(<4 x float> %x, <4 x float> %y, i32 %z) { ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $3, %edi -; SKX-NEXT: movzbl -24(%rsp,%rdi,4), %eax +; SKX-NEXT: movb -24(%rsp,%rdi,4), %al ; SKX-NEXT: retq %cmp = fcmp ueq <4 x float> %x, %y %val = extractelement <4 x i1> %cmp, i32 %z @@ -4521,7 +4521,7 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) { ; KNL-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $1, %edi -; KNL-NEXT: movzbl -24(%rsp,%rdi,8), %eax +; KNL-NEXT: movb -24(%rsp,%rdi,8), %al ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; @@ -4532,7 +4532,7 @@ define i1 @movmsk_v2f64_var(<2 x double> %x, <2 x double> %y, i32 %z) { ; SKX-NEXT: vpmovm2q %k0, %xmm0 ; SKX-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) ; SKX-NEXT: andl $1, %edi -; SKX-NEXT: movzbl -24(%rsp,%rdi,8), %eax +; SKX-NEXT: movb -24(%rsp,%rdi,8), %al ; SKX-NEXT: retq %cmp = fcmp oge <2 x double> %x, %y %val = extractelement <2 x i1> %cmp, i32 %z diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll index e722ab33894e2b..48b32a4250b40c 100644 --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -82,7 +82,7 @@ define void @f_thunk(ptr %this, ...) { ; LINUX-NEXT: movq %rbp, %rdx ; LINUX-NEXT: movq %r13, %rcx ; LINUX-NEXT: movq %r12, %r8 -; LINUX-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; LINUX-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; LINUX-NEXT: movq %r15, %r9 ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload @@ -175,7 +175,7 @@ define void @f_thunk(ptr %this, ...) { ; LINUX-X32-NEXT: movq %rbp, %rdx ; LINUX-X32-NEXT: movq %r13, %rcx ; LINUX-X32-NEXT: movq %r12, %r8 -; LINUX-X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; LINUX-X32-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; LINUX-X32-NEXT: movq %r15, %r9 ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload diff --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll index 961205c50d9763..84f82b7a694c35 100644 --- a/llvm/test/CodeGen/X86/neg-abs.ll +++ b/llvm/test/CodeGen/X86/neg-abs.ll @@ -12,7 +12,7 @@ declare i128 @llvm.abs.i128(i128, i1) define i8 @neg_abs_i8(i8 %x) nounwind { ; X86-LABEL: neg_abs_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarb $7, %al ; X86-NEXT: xorb %al, %cl @@ -154,7 +154,7 @@ define i128 @neg_abs_i128(i128 %x) nounwind { define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind { ; X86-LABEL: sub_abs_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: sarb $7, %al ; X86-NEXT: xorb %al, %cl diff --git a/llvm/test/CodeGen/X86/negate-i1.ll b/llvm/test/CodeGen/X86/negate-i1.ll index 0ed3fa94f5b8fe..2ed6c9535d0ca2 100644 --- a/llvm/test/CodeGen/X86/negate-i1.ll +++ b/llvm/test/CodeGen/X86/negate-i1.ll @@ -13,7 +13,7 @@ define i8 @select_i8_neg1_or_0(i1 %a) { ; ; X32-LABEL: select_i8_neg1_or_0: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: andb $1, %al ; X32-NEXT: negb %al ; X32-NEXT: retl @@ -31,7 +31,7 @@ define i8 @select_i8_neg1_or_0_zeroext(i1 zeroext %a) { ; ; X32-LABEL: select_i8_neg1_or_0_zeroext: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: negb %al ; X32-NEXT: retl %b = sext i1 %a to i8 diff --git a/llvm/test/CodeGen/X86/oddshuffles.ll b/llvm/test/CodeGen/X86/oddshuffles.ll index 0e5539449b9168..8ec463a7672283 100644 --- a/llvm/test/CodeGen/X86/oddshuffles.ll +++ b/llvm/test/CodeGen/X86/oddshuffles.ll @@ -237,7 +237,7 @@ define void @v7i8(<4 x i8> %a, <4 x i8> %b, ptr %p) nounwind { ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pandn %xmm0, %xmm2 ; SSE2-NEXT: por %xmm1, %xmm2 -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-NEXT: movb %al, 6(%rdi) ; SSE2-NEXT: movd %xmm2, (%rdi) ; SSE2-NEXT: pextrw $2, %xmm2, %eax diff --git a/llvm/test/CodeGen/X86/or-with-overflow.ll b/llvm/test/CodeGen/X86/or-with-overflow.ll index faee83a988c586..495da7e88b7733 100644 --- a/llvm/test/CodeGen/X86/or-with-overflow.ll +++ b/llvm/test/CodeGen/X86/or-with-overflow.ll @@ -9,7 +9,7 @@ define i8 @or_i8_ri(i8 zeroext %0, i8 zeroext %1) { ; X86-LABEL: or_i8_ri: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orb $-17, %cl ; X86-NEXT: je .LBB0_2 @@ -35,8 +35,8 @@ define i8 @or_i8_ri(i8 zeroext %0, i8 zeroext %1) { define i8 @or_i8_rr(i8 zeroext %0, i8 zeroext %1) { ; X86-LABEL: or_i8_rr: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: orb %al, %cl ; X86-NEXT: je .LBB1_2 ; X86-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/X86/packed_struct.ll b/llvm/test/CodeGen/X86/packed_struct.ll index af9f31b7170846..69e3a6e970d374 100644 --- a/llvm/test/CodeGen/X86/packed_struct.ll +++ b/llvm/test/CodeGen/X86/packed_struct.ll @@ -30,7 +30,7 @@ entry: define i8 @bar() nounwind { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl bara+19, %eax +; CHECK-NEXT: movb bara+19, %al ; CHECK-NEXT: addb bara+4, %al ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll index f3d4b6221d085f..a632a17e90a293 100644 --- a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -14,7 +14,7 @@ declare i32 @bar(i64) define i1 @plus_one() nounwind { ; CHECK32-LABEL: plus_one: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl M, %eax +; CHECK32-NEXT: movb M, %al ; CHECK32-NEXT: incl L ; CHECK32-NEXT: jne .LBB0_2 ; CHECK32-NEXT: # %bb.1: # %entry @@ -29,7 +29,7 @@ define i1 @plus_one() nounwind { ; ; CHECK64-LABEL: plus_one: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: movzbl M(%rip), %eax +; CHECK64-NEXT: movb M(%rip), %al ; CHECK64-NEXT: incl L(%rip) ; CHECK64-NEXT: jne .LBB0_2 ; CHECK64-NEXT: # %bb.1: # %entry @@ -62,7 +62,7 @@ exit2: define i1 @plus_forty_two() nounwind { ; CHECK32-LABEL: plus_forty_two: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl M, %eax +; CHECK32-NEXT: movb M, %al ; CHECK32-NEXT: addl $42, L ; CHECK32-NEXT: jne .LBB1_2 ; CHECK32-NEXT: # %bb.1: # %entry @@ -77,7 +77,7 @@ define i1 @plus_forty_two() nounwind { ; ; CHECK64-LABEL: plus_forty_two: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: movzbl M(%rip), %eax +; CHECK64-NEXT: movb M(%rip), %al ; CHECK64-NEXT: addl $42, L(%rip) ; CHECK64-NEXT: jne .LBB1_2 ; CHECK64-NEXT: # %bb.1: # %entry @@ -110,7 +110,7 @@ exit2: define i1 @minus_one() nounwind { ; CHECK32-LABEL: minus_one: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl M, %eax +; CHECK32-NEXT: movb M, %al ; CHECK32-NEXT: decl L ; CHECK32-NEXT: jne .LBB2_2 ; CHECK32-NEXT: # %bb.1: # %entry @@ -125,7 +125,7 @@ define i1 @minus_one() nounwind { ; ; CHECK64-LABEL: minus_one: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: movzbl M(%rip), %eax +; CHECK64-NEXT: movb M(%rip), %al ; CHECK64-NEXT: decl L(%rip) ; CHECK64-NEXT: jne .LBB2_2 ; CHECK64-NEXT: # %bb.1: # %entry @@ -158,7 +158,7 @@ exit2: define i1 @minus_forty_two() nounwind { ; CHECK32-LABEL: minus_forty_two: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl M, %eax +; CHECK32-NEXT: movb M, %al ; CHECK32-NEXT: addl $-42, L ; CHECK32-NEXT: jne .LBB3_2 ; CHECK32-NEXT: # %bb.1: # %entry @@ -173,7 +173,7 @@ define i1 @minus_forty_two() nounwind { ; ; CHECK64-LABEL: minus_forty_two: ; CHECK64: # %bb.0: # %entry -; CHECK64-NEXT: movzbl M(%rip), %eax +; CHECK64-NEXT: movb M(%rip), %al ; CHECK64-NEXT: addl $-42, L(%rip) ; CHECK64-NEXT: jne .LBB3_2 ; CHECK64-NEXT: # %bb.1: # %entry diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll index 78012b3d514e17..de1cb22fd402b9 100644 --- a/llvm/test/CodeGen/X86/popcnt.ll +++ b/llvm/test/CodeGen/X86/popcnt.ll @@ -9,7 +9,7 @@ define i8 @cnt8(i8 %x) nounwind readnone { ; X86-LABEL: cnt8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: shrb %al ; X86-NEXT: andb $85, %al diff --git a/llvm/test/CodeGen/X86/pr12360.ll b/llvm/test/CodeGen/X86/pr12360.ll index e0defce3acafa4..d3eae92d73b3c4 100644 --- a/llvm/test/CodeGen/X86/pr12360.ll +++ b/llvm/test/CodeGen/X86/pr12360.ll @@ -4,7 +4,7 @@ define zeroext i1 @f1(ptr %x) { ; CHECK-LABEL: f1: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movb (%rdi), %al ; CHECK-NEXT: retq entry: @@ -16,7 +16,7 @@ entry: define zeroext i1 @f2(ptr %x) { ; CHECK-LABEL: f2: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movb (%rdi), %al ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll index 38107284509e06..11cd6828b11b0a 100644 --- a/llvm/test/CodeGen/X86/pr15267.ll +++ b/llvm/test/CodeGen/X86/pr15267.ll @@ -27,7 +27,7 @@ define <4 x i3> @test1(ptr %in) nounwind { define <4 x i1> @test2(ptr %in) nounwind { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movb (%rdi), %al ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrb %cl ; CHECK-NEXT: andb $1, %cl @@ -53,7 +53,7 @@ define <4 x i1> @test2(ptr %in) nounwind { define <4 x i64> @test3(ptr %in) nounwind { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movb (%rdi), %al ; CHECK-NEXT: movzbl %al, %ecx ; CHECK-NEXT: shrb %al ; CHECK-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/pr20011.ll b/llvm/test/CodeGen/X86/pr20011.ll index 4810226b4a7564..077dc40294e5e9 100644 --- a/llvm/test/CodeGen/X86/pr20011.ll +++ b/llvm/test/CodeGen/X86/pr20011.ll @@ -8,8 +8,8 @@ define void @crash(i64 %x0, i64 %y0, ptr nocapture %dest) nounwind { ; X86-LABEL: crash: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: shlb $2, %dl ; X86-NEXT: andb $3, %cl ; X86-NEXT: orb %dl, %cl diff --git a/llvm/test/CodeGen/X86/pr22473.ll b/llvm/test/CodeGen/X86/pr22473.ll index 78f5ad89096403..25545c48b5cd28 100644 --- a/llvm/test/CodeGen/X86/pr22473.ll +++ b/llvm/test/CodeGen/X86/pr22473.ll @@ -6,7 +6,7 @@ define zeroext i1 @PR22473(ptr, i8) { ; X86-LABEL: PR22473: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: cmpb {{[0-9]+}}(%esp), %al ; X86-NEXT: sete %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/pr28824.ll b/llvm/test/CodeGen/X86/pr28824.ll index 274689527a243c..30c67ff98f4ee7 100644 --- a/llvm/test/CodeGen/X86/pr28824.ll +++ b/llvm/test/CodeGen/X86/pr28824.ll @@ -1,33 +1,14 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s @d = global i32 0, align 4 ; Verify the sar happens before ecx is clobbered with the parameter being ; passed to fn3 - +; CHECK-LABEL: fn4 +; CHECK: movb d, %cl +; CHECK: sarl %cl +; CHECK: movl $2, %ecx define i32 @fn4(i32 %i) #0 { -; CHECK-LABEL: fn4: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $8, %esp -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movzbl d, %ecx -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: sarl %cl, %esi -; CHECK-NEXT: subl $8, %esp -; CHECK-NEXT: movl $2, %ecx -; CHECK-NEXT: movl $5, %edx -; CHECK-NEXT: pushl %eax -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: calll fn3@PLT -; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testl %esi, %esi -; CHECK-NEXT: setle %al -; CHECK-NEXT: addl $8, %esp -; CHECK-NEXT: popl %esi -; CHECK-NEXT: retl entry: %0 = load i32, ptr @d, align 4 %shr = ashr i32 %i, %0 diff --git a/llvm/test/CodeGen/X86/pr32345.ll b/llvm/test/CodeGen/X86/pr32345.ll index 2745cb8bb908bb..511672a6f2c5d1 100644 --- a/llvm/test/CodeGen/X86/pr32345.ll +++ b/llvm/test/CodeGen/X86/pr32345.ll @@ -72,7 +72,7 @@ define void @foo() { ; ; X64-LABEL: foo: ; X64: # %bb.0: # %bb -; X64-NEXT: movzbl var_27(%rip), %ecx +; X64-NEXT: movb var_27(%rip), %cl ; X64-NEXT: movzwl var_22(%rip), %eax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X64-NEXT: addb $30, %cl @@ -89,7 +89,7 @@ define void @foo() { ; 686-NEXT: .cfi_def_cfa_register %ebp ; 686-NEXT: andl $-8, %esp ; 686-NEXT: subl $8, %esp -; 686-NEXT: movzbl var_27, %ecx +; 686-NEXT: movb var_27, %cl ; 686-NEXT: movzwl var_22, %eax ; 686-NEXT: movl %eax, (%esp) ; 686-NEXT: movl $0, {{[0-9]+}}(%esp) diff --git a/llvm/test/CodeGen/X86/pr34292.ll b/llvm/test/CodeGen/X86/pr34292.ll index b9cc84c338efbc..effc9a556a7cc8 100644 --- a/llvm/test/CodeGen/X86/pr34292.ll +++ b/llvm/test/CodeGen/X86/pr34292.ll @@ -7,7 +7,7 @@ define void @sum_unroll(ptr nocapture readonly, ptr nocapture) { ; CHECK-LABEL: sum_unroll: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl _ZL1c(%rip), %eax +; CHECK-NEXT: movb _ZL1c(%rip), %al ; CHECK-NEXT: movq (%rdi), %rcx ; CHECK-NEXT: addb $-1, %al ; CHECK-NEXT: adcq %rcx, (%rsi) diff --git a/llvm/test/CodeGen/X86/pr34381.ll b/llvm/test/CodeGen/X86/pr34381.ll index 32d8b74c6667f9..c2b25b43cdf92e 100644 --- a/llvm/test/CodeGen/X86/pr34381.ll +++ b/llvm/test/CodeGen/X86/pr34381.ll @@ -17,7 +17,7 @@ define void @_Z3foov() { ; CHECK-NEXT: cmpl %eax, var_21(%rip) ; CHECK-NEXT: setb %cl ; CHECK-NEXT: movl %ecx, var_390(%rip) -; CHECK-NEXT: movzbl var_11(%rip), %eax +; CHECK-NEXT: movb var_11(%rip), %al ; CHECK-NEXT: movb %al, var_370(%rip) ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/pr35765.ll b/llvm/test/CodeGen/X86/pr35765.ll index 81d1fbe9d642b0..5d15c2db3e6290 100644 --- a/llvm/test/CodeGen/X86/pr35765.ll +++ b/llvm/test/CodeGen/X86/pr35765.ll @@ -9,7 +9,7 @@ define dso_local void @PR35765() { ; CHECK-LABEL: PR35765: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl s1(%rip), %ecx +; CHECK-NEXT: movb s1(%rip), %cl ; CHECK-NEXT: addb $-118, %cl ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: shll %cl, %eax diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll index 8736d8e91e768f..094767d2316b83 100644 --- a/llvm/test/CodeGen/X86/pr38539.ll +++ b/llvm/test/CodeGen/X86/pr38539.ll @@ -6,7 +6,7 @@ define void @f() { ; X64-LABEL: f: ; X64: # %bb.0: # %BB -; X64-NEXT: movzbl (%rax), %eax +; X64-NEXT: movb (%rax), %al ; X64-NEXT: cmpb $0, (%rax) ; X64-NEXT: setne (%rax) ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax @@ -23,7 +23,7 @@ define void @f() { ; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: cmpb $0, (%eax) ; X86-NEXT: setne (%eax) ; X86-NEXT: leal -{{[0-9]+}}(%esp), %eax @@ -61,7 +61,7 @@ BB: define void @g() { ; X64-LABEL: g: ; X64: # %bb.0: # %BB -; X64-NEXT: movzbl (%rax), %eax +; X64-NEXT: movb (%rax), %al ; X64-NEXT: cmpb $0, (%rax) ; X64-NEXT: setne (%rax) ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax @@ -78,7 +78,7 @@ define void @g() { ; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: cmpb $0, (%eax) ; X86-NEXT: setne (%eax) ; X86-NEXT: leal -{{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/pr38743.ll b/llvm/test/CodeGen/X86/pr38743.ll index c05310090660dd..fff34a8ec4f54a 100644 --- a/llvm/test/CodeGen/X86/pr38743.ll +++ b/llvm/test/CodeGen/X86/pr38743.ll @@ -40,11 +40,11 @@ define void @pr38743(i32 %a0) #1 align 2 { ; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq %rax, (%rax) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al ; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %esi -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %edi +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %dil ; CHECK-NEXT: movb %al, (%rax) ; CHECK-NEXT: movq %rcx, 1(%rax) ; CHECK-NEXT: movw %dx, 9(%rax) diff --git a/llvm/test/CodeGen/X86/pr38795.ll b/llvm/test/CodeGen/X86/pr38795.ll index b421cc471fbfcf..37f0cf886416a8 100644 --- a/llvm/test/CodeGen/X86/pr38795.ll +++ b/llvm/test/CodeGen/X86/pr38795.ll @@ -51,7 +51,7 @@ define dso_local void @fn() { ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl a -; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: movb %cl, %dh ; CHECK-NEXT: movl $0, h ; CHECK-NEXT: cmpb $8, %dl @@ -76,7 +76,7 @@ define dso_local void @fn() { ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: movl $.str, (%esp) ; CHECK-NEXT: calll printf -; CHECK-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload ; CHECK-NEXT: # implicit-def: $eax ; CHECK-NEXT: .LBB0_6: # %for.cond35 ; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 diff --git a/llvm/test/CodeGen/X86/pr39926.ll b/llvm/test/CodeGen/X86/pr39926.ll index 439ee5784416c9..edaacafd3f4cf9 100644 --- a/llvm/test/CodeGen/X86/pr39926.ll +++ b/llvm/test/CodeGen/X86/pr39926.ll @@ -10,17 +10,17 @@ define i8 @test_offset(ptr %base) { ; CHECK-NEXT: movl $0, 1(%rdi) ; CHECK-NEXT: movl -4(%rdi), %eax ; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movb (%rdi), %al ; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movl 1(%rdi), %eax ; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movzwl 5(%rdi), %eax ; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl 7(%rdi), %eax +; CHECK-NEXT: movb 7(%rdi), %al ; CHECK-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movl 8(%rdi), %eax ; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr46527.ll b/llvm/test/CodeGen/X86/pr46527.ll index 5ae953ab82ab4b..82701f5d1b4cf4 100644 --- a/llvm/test/CodeGen/X86/pr46527.ll +++ b/llvm/test/CodeGen/X86/pr46527.ll @@ -12,7 +12,7 @@ define void @f(ptr %out, <16 x i8> %in, i1 %flag) { ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp0-.L0$pb), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %dl ; CHECK-NEXT: notb %dl ; CHECK-NEXT: andb $1, %dl ; CHECK-NEXT: movzbl %dl, %edx diff --git a/llvm/test/CodeGen/X86/pr5145.ll b/llvm/test/CodeGen/X86/pr5145.ll index da15bd6720ce24..6990e784e9562b 100644 --- a/llvm/test/CodeGen/X86/pr5145.ll +++ b/llvm/test/CodeGen/X86/pr5145.ll @@ -5,7 +5,7 @@ define void @atomic_maxmin_i8() { ; CHECK-LABEL: atomic_maxmin_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl sc8(%rip), %eax +; CHECK-NEXT: movb sc8(%rip), %al ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -17,7 +17,7 @@ define void @atomic_maxmin_i8() { ; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip) ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %atomicrmw.end -; CHECK-NEXT: movzbl sc8(%rip), %eax +; CHECK-NEXT: movb sc8(%rip), %al ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_3: # %atomicrmw.start2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -29,7 +29,7 @@ define void @atomic_maxmin_i8() { ; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip) ; CHECK-NEXT: jne .LBB0_3 ; CHECK-NEXT: # %bb.4: # %atomicrmw.end1 -; CHECK-NEXT: movzbl sc8(%rip), %eax +; CHECK-NEXT: movb sc8(%rip), %al ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_5: # %atomicrmw.start8 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -41,7 +41,7 @@ define void @atomic_maxmin_i8() { ; CHECK-NEXT: lock cmpxchgb %cl, sc8(%rip) ; CHECK-NEXT: jne .LBB0_5 ; CHECK-NEXT: # %bb.6: # %atomicrmw.end7 -; CHECK-NEXT: movzbl sc8(%rip), %eax +; CHECK-NEXT: movb sc8(%rip), %al ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_7: # %atomicrmw.start14 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/X86/reduce-trunc-shl.ll b/llvm/test/CodeGen/X86/reduce-trunc-shl.ll index 54ba5bb1eae498..891d42a26feb9a 100644 --- a/llvm/test/CodeGen/X86/reduce-trunc-shl.ll +++ b/llvm/test/CodeGen/X86/reduce-trunc-shl.ll @@ -157,14 +157,14 @@ define void @trunc_shl_16_i16_i64(ptr %out, ptr %in) { define void @trunc_shl_7_i8_i64(ptr %out, ptr %in) { ; SSE2-LABEL: trunc_shl_7_i8_i64: ; SSE2: # %bb.0: -; SSE2-NEXT: movzbl (%rsi), %eax +; SSE2-NEXT: movb (%rsi), %al ; SSE2-NEXT: shlb $7, %al ; SSE2-NEXT: movb %al, (%rdi) ; SSE2-NEXT: retq ; ; AVX2-LABEL: trunc_shl_7_i8_i64: ; AVX2: # %bb.0: -; AVX2-NEXT: movzbl (%rsi), %eax +; AVX2-NEXT: movb (%rsi), %al ; AVX2-NEXT: shlb $7, %al ; AVX2-NEXT: movb %al, (%rdi) ; AVX2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/rot16.ll b/llvm/test/CodeGen/X86/rot16.ll index c7c2d33d98922b..c101a7d5977e65 100644 --- a/llvm/test/CodeGen/X86/rot16.ll +++ b/llvm/test/CodeGen/X86/rot16.ll @@ -7,7 +7,7 @@ define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind { ; X86-LABEL: foo: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolw %cl, %ax ; X86-NEXT: retl @@ -32,7 +32,7 @@ define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: shldw %cl, %dx, %ax ; X86-NEXT: retl @@ -56,7 +56,7 @@ define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind { define i16 @un(i16 %x, i16 %y, i16 %z) nounwind { ; X86-LABEL: un: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorw %cl, %ax ; X86-NEXT: retl @@ -81,7 +81,7 @@ define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: shrdw %cl, %dx, %ax ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/rot32.ll b/llvm/test/CodeGen/X86/rot32.ll index d8edd31c3c06a9..aa2cabb43d0c55 100644 --- a/llvm/test/CodeGen/X86/rot32.ll +++ b/llvm/test/CodeGen/X86/rot32.ll @@ -9,7 +9,7 @@ define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone { ; CHECK32-LABEL: foo: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK32-NEXT: roll %cl, %eax ; CHECK32-NEXT: retl @@ -32,7 +32,7 @@ entry: define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone { ; CHECK32-LABEL: bar: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK32-NEXT: shldl %cl, %edx, %eax @@ -56,7 +56,7 @@ entry: define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone { ; CHECK32-LABEL: un: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK32-NEXT: rorl %cl, %eax ; CHECK32-NEXT: retl @@ -79,7 +79,7 @@ entry: define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone { ; CHECK32-LABEL: bu: ; CHECK32: # %bb.0: # %entry -; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK32-NEXT: shrdl %cl, %edx, %eax diff --git a/llvm/test/CodeGen/X86/rotate.ll b/llvm/test/CodeGen/X86/rotate.ll index ea32edba628228..4e61a21aaac7b3 100644 --- a/llvm/test/CodeGen/X86/rotate.ll +++ b/llvm/test/CodeGen/X86/rotate.ll @@ -8,7 +8,7 @@ define i64 @rotl64(i64 %A, i8 %Amt) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %esi, %eax @@ -62,7 +62,7 @@ define i64 @rotr64(i64 %A, i8 %Amt) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %edx @@ -197,7 +197,7 @@ define i64 @rotr1_64(i64 %A) nounwind { define i32 @rotl32(i32 %A, i8 %Amt) nounwind { ; X86-LABEL: rotl32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: roll %cl, %eax ; X86-NEXT: retl @@ -221,7 +221,7 @@ define i32 @rotl32(i32 %A, i8 %Amt) nounwind { define i32 @rotr32(i32 %A, i8 %Amt) nounwind { ; X86-LABEL: rotr32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorl %cl, %eax ; X86-NEXT: retl @@ -317,7 +317,7 @@ define i32 @rotr1_32(i32 %A) nounwind { define i16 @rotl16(i16 %A, i8 %Amt) nounwind { ; X86-LABEL: rotl16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolw %cl, %ax ; X86-NEXT: retl @@ -342,7 +342,7 @@ define i16 @rotl16(i16 %A, i8 %Amt) nounwind { define i16 @rotr16(i16 %A, i8 %Amt) nounwind { ; X86-LABEL: rotr16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorw %cl, %ax ; X86-NEXT: retl @@ -443,8 +443,8 @@ define i16 @rotr1_16(i16 %A) nounwind { define i8 @rotl8(i8 %A, i8 %Amt) nounwind { ; X86-LABEL: rotl8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rolb %cl, %al ; X86-NEXT: retl ; @@ -466,8 +466,8 @@ define i8 @rotl8(i8 %A, i8 %Amt) nounwind { define i8 @rotr8(i8 %A, i8 %Amt) nounwind { ; X86-LABEL: rotr8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rorb %cl, %al ; X86-NEXT: retl ; @@ -489,7 +489,7 @@ define i8 @rotr8(i8 %A, i8 %Amt) nounwind { define i8 @rotli8(i8 %A) nounwind { ; X86-LABEL: rotli8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rolb $5, %al ; X86-NEXT: retl ; @@ -508,7 +508,7 @@ define i8 @rotli8(i8 %A) nounwind { define i8 @rotri8(i8 %A) nounwind { ; X86-LABEL: rotri8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rolb $3, %al ; X86-NEXT: retl ; @@ -527,7 +527,7 @@ define i8 @rotri8(i8 %A) nounwind { define i8 @rotl1_8(i8 %A) nounwind { ; X86-LABEL: rotl1_8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rolb %al ; X86-NEXT: retl ; @@ -546,7 +546,7 @@ define i8 @rotl1_8(i8 %A) nounwind { define i8 @rotr1_8(i8 %A) nounwind { ; X86-LABEL: rotr1_8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rorb %al ; X86-NEXT: retl ; @@ -653,7 +653,7 @@ define i64 @truncated_rot(i64 %x, i32 %amt) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %esi, %eax diff --git a/llvm/test/CodeGen/X86/rotate4.ll b/llvm/test/CodeGen/X86/rotate4.ll index 0cc9f465dd75a8..fff0f1da693d1d 100644 --- a/llvm/test/CodeGen/X86/rotate4.ll +++ b/llvm/test/CodeGen/X86/rotate4.ll @@ -8,7 +8,7 @@ define i32 @rotate_left_32(i32 %a, i32 %b) { ; X86-LABEL: rotate_left_32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: roll %cl, %eax ; X86-NEXT: retl @@ -32,7 +32,7 @@ define i32 @rotate_left_32(i32 %a, i32 %b) { define i32 @rotate_right_32(i32 %a, i32 %b) { ; X86-LABEL: rotate_right_32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorl %cl, %eax ; X86-NEXT: retl @@ -65,7 +65,7 @@ define i64 @rotate_left_64(i64 %a, i64 %b) { ; X86-NEXT: .cfi_offset %esi, -16 ; X86-NEXT: .cfi_offset %edi, -12 ; X86-NEXT: .cfi_offset %ebx, -8 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %esi, %eax @@ -126,7 +126,7 @@ define i64 @rotate_right_64(i64 %a, i64 %b) { ; X86-NEXT: .cfi_offset %esi, -16 ; X86-NEXT: .cfi_offset %edi, -12 ; X86-NEXT: .cfi_offset %ebx, -8 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %edx @@ -180,7 +180,7 @@ define i64 @rotate_right_64(i64 %a, i64 %b) { define void @rotate_left_m32(ptr%pa, i32 %b) { ; X86-LABEL: rotate_left_m32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: roll %cl, (%eax) ; X86-NEXT: retl @@ -205,7 +205,7 @@ define void @rotate_left_m32(ptr%pa, i32 %b) { define void @rotate_right_m32(ptr%pa, i32 %b) { ; X86-LABEL: rotate_right_m32: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorl %cl, (%eax) ; X86-NEXT: retl @@ -242,7 +242,7 @@ define void @rotate_left_m64(ptr%pa, i64 %b) { ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %esi ; X86-NEXT: movl 4(%eax), %ebx @@ -312,7 +312,7 @@ define void @rotate_right_m64(ptr%pa, i64 %b) { ; X86-NEXT: .cfi_offset %edi, -16 ; X86-NEXT: .cfi_offset %ebx, -12 ; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl (%eax), %ebx ; X86-NEXT: movl 4(%eax), %esi @@ -373,8 +373,8 @@ define void @rotate_right_m64(ptr%pa, i64 %b) { define i8 @rotate_left_8(i8 %x, i32 %amount) { ; X86-LABEL: rotate_left_8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rolb %cl, %al ; X86-NEXT: retl ; @@ -399,8 +399,8 @@ define i8 @rotate_left_8(i8 %x, i32 %amount) { define i8 @rotate_right_8(i8 %x, i32 %amount) { ; X86-LABEL: rotate_right_8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: rorb %cl, %al ; X86-NEXT: retl ; @@ -425,7 +425,7 @@ define i8 @rotate_right_8(i8 %x, i32 %amount) { define i16 @rotate_left_16(i16 %x, i32 %amount) { ; X86-LABEL: rotate_left_16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolw %cl, %ax ; X86-NEXT: retl @@ -451,7 +451,7 @@ define i16 @rotate_left_16(i16 %x, i32 %amount) { define i16 @rotate_right_16(i16 %x, i32 %amount) { ; X86-LABEL: rotate_right_16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorw %cl, %ax ; X86-NEXT: retl @@ -477,7 +477,7 @@ define i16 @rotate_right_16(i16 %x, i32 %amount) { define void @rotate_left_m8(ptr %p, i32 %amount) { ; X86-LABEL: rotate_left_m8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolb %cl, (%eax) ; X86-NEXT: retl @@ -503,7 +503,7 @@ define void @rotate_left_m8(ptr %p, i32 %amount) { define void @rotate_right_m8(ptr %p, i32 %amount) { ; X86-LABEL: rotate_right_m8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorb %cl, (%eax) ; X86-NEXT: retl @@ -529,7 +529,7 @@ define void @rotate_right_m8(ptr %p, i32 %amount) { define void @rotate_left_m16(ptr %p, i32 %amount) { ; X86-LABEL: rotate_left_m16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolw %cl, (%eax) ; X86-NEXT: retl @@ -555,7 +555,7 @@ define void @rotate_left_m16(ptr %p, i32 %amount) { define void @rotate_right_m16(ptr %p, i32 %amount) { ; X86-LABEL: rotate_right_m16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorw %cl, (%eax) ; X86-NEXT: retl @@ -582,7 +582,7 @@ define i32 @rotate_demanded_bits(i32, i32) { ; X86-LABEL: rotate_demanded_bits: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $30, %cl ; X86-NEXT: roll %cl, %eax ; X86-NEXT: retl @@ -608,7 +608,7 @@ define i32 @rotate_demanded_bits_2(i32, i32) { ; X86-LABEL: rotate_demanded_bits_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $23, %cl ; X86-NEXT: roll %cl, %eax ; X86-NEXT: retl @@ -633,7 +633,7 @@ define i32 @rotate_demanded_bits_2(i32, i32) { define i32 @rotate_demanded_bits_3(i32, i32) { ; X86-LABEL: rotate_demanded_bits_3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: addb %cl, %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: roll %cl, %eax diff --git a/llvm/test/CodeGen/X86/sadd_sat.ll b/llvm/test/CodeGen/X86/sadd_sat.ll index 5b9a42d1f0d91f..30ccd08b52cf01 100644 --- a/llvm/test/CodeGen/X86/sadd_sat.ll +++ b/llvm/test/CodeGen/X86/sadd_sat.ll @@ -101,8 +101,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind { define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %eax, %edx ; X86-NEXT: addb %cl, %dl ; X86-NEXT: sarb $7, %dl @@ -134,7 +134,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: movzbl %al, %ecx ; X86-NEXT: cmpb $7, %al diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll index deabeb27cdab83..abb928421afa4f 100644 --- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll @@ -107,9 +107,9 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: mulb {{[0-9]+}}(%esp) -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %eax, %edx ; X86-NEXT: addb %cl, %dl ; X86-NEXT: sarb $7, %dl @@ -145,7 +145,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; X86-LABEL: func4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: mulb {{[0-9]+}}(%esp) ; X86-NEXT: shlb $4, %al ; X86-NEXT: sarb $4, %al diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll index 11d86dd72c5613..f9af360bf30bbd 100644 --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -429,8 +429,8 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; SSE-LABEL: v1i8: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %eax -; SSE-NEXT: movzbl (%rsi), %ecx +; SSE-NEXT: movb (%rdi), %al +; SSE-NEXT: movb (%rsi), %cl ; SSE-NEXT: leal (%rax,%rcx), %esi ; SSE-NEXT: sarb $7, %sil ; SSE-NEXT: addb $-128, %sil @@ -443,8 +443,8 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; ; AVX-LABEL: v1i8: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax -; AVX-NEXT: movzbl (%rsi), %ecx +; AVX-NEXT: movb (%rdi), %al +; AVX-NEXT: movb (%rsi), %cl ; AVX-NEXT: leal (%rax,%rcx), %esi ; AVX-NEXT: sarb $7, %sil ; AVX-NEXT: addb $-128, %sil diff --git a/llvm/test/CodeGen/X86/sdiv_fix.ll b/llvm/test/CodeGen/X86/sdiv_fix.ll index 5b4d1801409577..37d47ebc228ba1 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix.ll @@ -224,10 +224,10 @@ define i4 @func4(i4 %x, i4 %y) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $4, %cl ; X86-NEXT: sarb $4, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: shlb $4, %dl ; X86-NEXT: sarb $4, %dl ; X86-NEXT: shlb $2, %dl diff --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll index 9e890e66625097..fa89da14508c6d 100644 --- a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll @@ -264,10 +264,10 @@ define i4 @func4(i4 %x, i4 %y) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $4, %cl ; X86-NEXT: sarb $4, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: shlb $4, %dl ; X86-NEXT: sarb $4, %dl ; X86-NEXT: shlb $2, %dl diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index bad7ef45dda589..2adcbd065cf7c3 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -207,7 +207,7 @@ define signext i8 @test4(ptr nocapture %P, double %F) nounwind readonly { ; MCU-NEXT: # kill: def $ah killed $ah killed $ax ; MCU-NEXT: sahf ; MCU-NEXT: seta %dl -; MCU-NEXT: movzbl (%ecx,%edx,4), %eax +; MCU-NEXT: movb (%ecx,%edx,4), %al ; MCU-NEXT: retl entry: %0 = fcmp olt double %F, 4.200000e+01 @@ -1235,7 +1235,7 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind { ; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %eax ; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %ecx ; ATHLON-NEXT: cmovll %eax, %ecx -; ATHLON-NEXT: movzbl (%ecx), %eax +; ATHLON-NEXT: movb (%ecx), %al ; ATHLON-NEXT: retl ; ; MCU-LABEL: test18: @@ -1276,7 +1276,7 @@ define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) { ; ATHLON-LABEL: trunc_select_miscompile: ; ATHLON: ## %bb.0: ; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax -; ATHLON-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; ATHLON-NEXT: movb {{[0-9]+}}(%esp), %cl ; ATHLON-NEXT: orb $2, %cl ; ATHLON-NEXT: shll %cl, %eax ; ATHLON-NEXT: retl @@ -1773,7 +1773,7 @@ define i8 @select_uaddo_common_op0(i8 %a, i8 %b, i8 %c, i1 %cond) { ; ; ATHLON-LABEL: select_uaddo_common_op0: ; ATHLON: ## %bb.0: -; ATHLON-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; ATHLON-NEXT: movb {{[0-9]+}}(%esp), %al ; ATHLON-NEXT: testb $1, {{[0-9]+}}(%esp) ; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %ecx ; ATHLON-NEXT: leal {{[0-9]+}}(%esp), %edx diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll index d225d420987b30..39c54af306d661 100644 --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -243,7 +243,7 @@ define i32 @test_gt_2(<4 x i32> %A, <4 x i32> %B) { define void @test_i1_uge(ptr%A2) { ; CHECK-LABEL: test_i1_uge: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl (%rdi), %eax +; CHECK-NEXT: movb (%rdi), %al ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: xorb $1, %cl ; CHECK-NEXT: andb %cl, %al diff --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll index 62380bcec1cab3..ec23cc9155ac4e 100644 --- a/llvm/test/CodeGen/X86/setcc.ll +++ b/llvm/test/CodeGen/X86/setcc.ll @@ -280,7 +280,7 @@ define i32 @t12(i32 %0, i32 %1) { define i16 @shift_and(i16 %a) { ; X86-LABEL: shift_and: ; X86: ## %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $4, %al ; X86-NEXT: shrb $2, %al ; X86-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/sext-trunc.ll b/llvm/test/CodeGen/X86/sext-trunc.ll index c293dcd8efe9c9..5c59bc00860e36 100644 --- a/llvm/test/CodeGen/X86/sext-trunc.ll +++ b/llvm/test/CodeGen/X86/sext-trunc.ll @@ -1,12 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s define signext i8 @foo(i16 signext %x) nounwind { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: retl %retval56 = trunc i16 %x to i8 ret i8 %retval56 +; CHECK-LABEL: foo: +; CHECK: movb +; CHECK-NEXT: retl } diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll index c89db15d12f45d..1b0f6f0c09c797 100644 --- a/llvm/test/CodeGen/X86/shift-amount-mod.ll +++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll @@ -1322,7 +1322,7 @@ define i64 @reg64_lshr_by_masked_negated_unfolded(i64 %val, i64 %shamt) nounwind ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; X32-NEXT: subb %dl, %cl ; X32-NEXT: movl %esi, %edx ; X32-NEXT: shrl %cl, %edx diff --git a/llvm/test/CodeGen/X86/shift-and.ll b/llvm/test/CodeGen/X86/shift-and.ll index 3cb680396b6ba3..ed010275f0820d 100644 --- a/llvm/test/CodeGen/X86/shift-and.ll +++ b/llvm/test/CodeGen/X86/shift-and.ll @@ -5,7 +5,7 @@ define i32 @t1(i32 %t, i32 %val) nounwind { ; X32-LABEL: t1: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: shll %cl, %eax ; X32-NEXT: retl @@ -25,7 +25,7 @@ define i32 @t1(i32 %t, i32 %val) nounwind { define i32 @t2(i32 %t, i32 %val) nounwind { ; X32-LABEL: t2: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: shll %cl, %eax ; X32-NEXT: retl @@ -47,7 +47,7 @@ define i32 @t2(i32 %t, i32 %val) nounwind { define void @t3(i16 %t) nounwind { ; X32-LABEL: t3: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: sarw %cl, X ; X32-NEXT: retl ; @@ -68,7 +68,7 @@ define i64 @t4(i64 %t, i64 %val) nounwind { ; X32-LABEL: t4: ; X32: # %bb.0: ; X32-NEXT: pushl %esi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl %esi, %edx @@ -99,7 +99,7 @@ define i64 @t5(i64 %t, i64 %val) nounwind { ; X32-LABEL: t5: ; X32: # %bb.0: ; X32-NEXT: pushl %esi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl %esi, %edx @@ -131,7 +131,7 @@ define void @t5ptr(i64 %t, ptr %ptr) nounwind { ; X32: # %bb.0: ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl (%eax), %edx ; X32-NEXT: movl 4(%eax), %edi diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll index db00e1c49dca7b..d95b9277ddb5ba 100644 --- a/llvm/test/CodeGen/X86/shift-bmi2.ll +++ b/llvm/test/CodeGen/X86/shift-bmi2.ll @@ -5,7 +5,7 @@ define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: shl32: ; BMI2: # %bb.0: -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; @@ -37,7 +37,7 @@ define i32 @shl32p(ptr %p, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: shl32p: ; BMI2: # %bb.0: ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: shlxl %ecx, (%eax), %eax ; BMI2-NEXT: retl ; @@ -74,7 +74,7 @@ define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone { ; BMI2-NEXT: pushl %esi ; BMI2-NEXT: .cfi_def_cfa_offset 8 ; BMI2-NEXT: .cfi_offset %esi, -8 -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; BMI2-NEXT: shldl %cl, %eax, %edx @@ -119,7 +119,7 @@ define i64 @shl64p(ptr %p, i64 %shamt) nounwind uwtable readnone { ; BMI2-NEXT: pushl %esi ; BMI2-NEXT: .cfi_def_cfa_offset 8 ; BMI2-NEXT: .cfi_offset %esi, -8 -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: movl (%eax), %esi ; BMI2-NEXT: movl 4(%eax), %edx @@ -165,7 +165,7 @@ define i64 @shl64pi(ptr %p) nounwind uwtable readnone { define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: lshr32: ; BMI2: # %bb.0: -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; @@ -181,7 +181,7 @@ define i32 @lshr32p(ptr %p, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: lshr32p: ; BMI2: # %bb.0: ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: shrxl %ecx, (%eax), %eax ; BMI2-NEXT: retl ; @@ -200,7 +200,7 @@ define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone { ; BMI2-NEXT: pushl %esi ; BMI2-NEXT: .cfi_def_cfa_offset 8 ; BMI2-NEXT: .cfi_offset %esi, -8 -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; BMI2-NEXT: shrdl %cl, %edx, %eax @@ -227,7 +227,7 @@ define i64 @lshr64p(ptr %p, i64 %shamt) nounwind uwtable readnone { ; BMI2-NEXT: pushl %esi ; BMI2-NEXT: .cfi_def_cfa_offset 8 ; BMI2-NEXT: .cfi_offset %esi, -8 -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; BMI2-NEXT: movl (%edx), %eax ; BMI2-NEXT: movl 4(%edx), %edx @@ -253,7 +253,7 @@ define i64 @lshr64p(ptr %p, i64 %shamt) nounwind uwtable readnone { define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: ashr32: ; BMI2: # %bb.0: -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; @@ -269,7 +269,7 @@ define i32 @ashr32p(ptr %p, i32 %shamt) nounwind uwtable readnone { ; BMI2-LABEL: ashr32p: ; BMI2: # %bb.0: ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: sarxl %ecx, (%eax), %eax ; BMI2-NEXT: retl ; @@ -288,7 +288,7 @@ define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone { ; BMI2-NEXT: pushl %esi ; BMI2-NEXT: .cfi_def_cfa_offset 8 ; BMI2-NEXT: .cfi_offset %esi, -8 -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; BMI2-NEXT: shrdl %cl, %edx, %eax @@ -315,7 +315,7 @@ define i64 @ashr64p(ptr %p, i64 %shamt) nounwind uwtable readnone { ; BMI2-NEXT: pushl %esi ; BMI2-NEXT: .cfi_def_cfa_offset 8 ; BMI2-NEXT: .cfi_offset %esi, -8 -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; BMI2-NEXT: movl (%edx), %eax ; BMI2-NEXT: movl 4(%edx), %edx @@ -341,7 +341,7 @@ define i64 @ashr64p(ptr %p, i64 %shamt) nounwind uwtable readnone { define i32 @shl32and(i32 %t, i32 %val) nounwind { ; BMI2-LABEL: shl32and: ; BMI2: # %bb.0: -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; @@ -358,7 +358,7 @@ define i64 @shl64and(i64 %t, i64 %val) nounwind { ; BMI2-LABEL: shl64and: ; BMI2: # %bb.0: ; BMI2-NEXT: pushl %esi -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; BMI2-NEXT: shldl %cl, %eax, %edx @@ -382,7 +382,7 @@ define i64 @shl64and(i64 %t, i64 %val) nounwind { define i32 @lshr32and(i32 %t, i32 %val) nounwind { ; BMI2-LABEL: lshr32and: ; BMI2: # %bb.0: -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; @@ -399,7 +399,7 @@ define i64 @lshr64and(i64 %t, i64 %val) nounwind { ; BMI2-LABEL: lshr64and: ; BMI2: # %bb.0: ; BMI2-NEXT: pushl %esi -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; BMI2-NEXT: shrdl %cl, %edx, %eax @@ -423,7 +423,7 @@ define i64 @lshr64and(i64 %t, i64 %val) nounwind { define i32 @ashr32and(i32 %t, i32 %val) nounwind { ; BMI2-LABEL: ashr32and: ; BMI2: # %bb.0: -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %al ; BMI2-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: retl ; @@ -440,7 +440,7 @@ define i64 @ashr64and(i64 %t, i64 %val) nounwind { ; BMI2-LABEL: ashr64and: ; BMI2: # %bb.0: ; BMI2-NEXT: pushl %esi -; BMI2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; BMI2-NEXT: shrdl %cl, %edx, %eax diff --git a/llvm/test/CodeGen/X86/shift-by-signext.ll b/llvm/test/CodeGen/X86/shift-by-signext.ll index 97a4318c0720f3..6ecf8cd9c882a7 100644 --- a/llvm/test/CodeGen/X86/shift-by-signext.ll +++ b/llvm/test/CodeGen/X86/shift-by-signext.ll @@ -8,7 +8,7 @@ define i32 @t0_shl(i32 %x, i8 %shamt) nounwind { ; X86-LABEL: t0_shl: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; @@ -23,7 +23,7 @@ define i32 @t0_shl(i32 %x, i8 %shamt) nounwind { define i32 @t1_lshr(i32 %x, i8 %shamt) nounwind { ; X86-LABEL: t1_lshr: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; @@ -38,7 +38,7 @@ define i32 @t1_lshr(i32 %x, i8 %shamt) nounwind { define i32 @t2_ashr(i32 %x, i8 %shamt) nounwind { ; X86-LABEL: t2_ashr: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl ; @@ -90,7 +90,7 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shldl %cl, %edx, %eax ; X86-NEXT: retl ; @@ -110,7 +110,7 @@ define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shrdl %cl, %edx, %eax ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/shift-coalesce.ll b/llvm/test/CodeGen/X86/shift-coalesce.ll index e01f56d643df04..3c71feb6483f41 100644 --- a/llvm/test/CodeGen/X86/shift-coalesce.ll +++ b/llvm/test/CodeGen/X86/shift-coalesce.ll @@ -12,7 +12,7 @@ define i64 @foo(i64 %x, ptr %X) { ; CHECK-NEXT: mov esi, dword ptr [esp + 8] ; CHECK-NEXT: mov edx, dword ptr [esp + 12] ; CHECK-NEXT: mov eax, dword ptr [esp + 16] -; CHECK-NEXT: movzx ecx, byte ptr [eax] +; CHECK-NEXT: mov cl, byte ptr [eax] ; CHECK-NEXT: mov eax, esi ; CHECK-NEXT: shl eax, cl ; CHECK-NEXT: shld edx, esi, cl diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll index 8f51cfd26240e0..075fbb29f4a954 100644 --- a/llvm/test/CodeGen/X86/shift-combine.ll +++ b/llvm/test/CodeGen/X86/shift-combine.ll @@ -180,7 +180,7 @@ define i64 @ashr_add_shl_i32(i64 %r) nounwind { define i64 @ashr_add_shl_i8(i64 %r) nounwind { ; X32-LABEL: ashr_add_shl_i8: ; X32: # %bb.0: -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movb {{[0-9]+}}(%esp), %al ; X32-NEXT: addb $2, %al ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: movl %eax, %edx @@ -204,8 +204,8 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) nounwind { ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X32-NEXT: movb {{[0-9]+}}(%esp), %cl +; X32-NEXT: movb {{[0-9]+}}(%esp), %dl ; X32-NEXT: movb {{[0-9]+}}(%esp), %ch ; X32-NEXT: movb {{[0-9]+}}(%esp), %dh ; X32-NEXT: incb %dh diff --git a/llvm/test/CodeGen/X86/shift-double.ll b/llvm/test/CodeGen/X86/shift-double.ll index 5a2028216033c7..1213a80921d27d 100644 --- a/llvm/test/CodeGen/X86/shift-double.ll +++ b/llvm/test/CodeGen/X86/shift-double.ll @@ -8,7 +8,7 @@ define i64 @test1(i64 %X, i8 %C) nounwind { ; X86-LABEL: test1: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %esi, %eax @@ -39,7 +39,7 @@ define i64 @test2(i64 %X, i8 %C) nounwind { ; X86-LABEL: test2: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %edx @@ -71,7 +71,7 @@ define i64 @test3(i64 %X, i8 %C) nounwind { ; X86-LABEL: test3: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl %esi, %edx @@ -103,7 +103,7 @@ define i64 @test3(i64 %X, i8 %C) nounwind { define i32 @test4(i32 %A, i32 %B, i8 %C) nounwind { ; X86-LABEL: test4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shldl %cl, %edx, %eax @@ -130,7 +130,7 @@ define i16 @test5(i16 %A, i16 %B, i8 %C) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: shldw %cl, %dx, %ax ; X86-NEXT: retl @@ -158,7 +158,7 @@ define i16 @test5(i16 %A, i16 %B, i8 %C) nounwind { define i32 @test6(i32 %A, i32 %B, i8 %C) nounwind { ; X86-LABEL: test6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrdl %cl, %edx, %eax @@ -185,7 +185,7 @@ define i16 @test7(i16 %A, i16 %B, i8 %C) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: shrdw %cl, %dx, %ax ; X86-NEXT: retl @@ -214,7 +214,7 @@ define i64 @test8(i64 %val, i32 %bits) nounwind { ; X86-LABEL: test8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %esi, %eax @@ -240,7 +240,7 @@ define i64 @test8(i64 %val, i32 %bits) nounwind { define i64 @test9(i64 %val, i32 %bits) nounwind { ; X86-LABEL: test9: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: shrdl %cl, %edx, %eax @@ -264,7 +264,7 @@ define i64 @test9(i64 %val, i32 %bits) nounwind { define i64 @test10(i64 %val, i32 %bits) nounwind { ; X86-LABEL: test10: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: shrdl %cl, %edx, %eax @@ -290,7 +290,7 @@ define i64 @test10(i64 %val, i32 %bits) nounwind { define i32 @test11(i32 %hi, i32 %lo, i32 %bits) nounwind { ; X86-LABEL: test11: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shldl %cl, %edx, %eax @@ -314,7 +314,7 @@ define i32 @test11(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test12(i32 %hi, i32 %lo, i32 %bits) nounwind { ; X86-LABEL: test12: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrdl %cl, %edx, %eax @@ -338,7 +338,7 @@ define i32 @test12(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test13(i32 %hi, i32 %lo, i32 %bits) nounwind { ; X86-LABEL: test13: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shldl %cl, %edx, %eax @@ -361,7 +361,7 @@ define i32 @test13(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test14(i32 %hi, i32 %lo, i32 %bits) nounwind { ; X86-LABEL: test14: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrdl %cl, %edx, %eax @@ -384,7 +384,7 @@ define i32 @test14(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind { ; X86-LABEL: test15: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shldl %cl, %edx, %eax @@ -408,7 +408,7 @@ define i32 @test15(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind { ; X86-LABEL: test16: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrdl %cl, %edx, %eax @@ -432,7 +432,7 @@ define i32 @test16(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test17(i32 %hi, i32 %lo, i32 %bits) nounwind { ; X86-LABEL: test17: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrdl %cl, %edx, %eax @@ -456,7 +456,7 @@ define i32 @test17(i32 %hi, i32 %lo, i32 %bits) nounwind { define i32 @test18(i32 %hi, i32 %lo, i32 %bits) nounwind { ; X86-LABEL: test18: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shldl %cl, %edx, %eax @@ -488,7 +488,7 @@ define i32 @not_shld_i32(i32, i32, i32) { ; X86-LABEL: not_shld_i32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: shll %cl, %edx ; X86-NEXT: negb %cl @@ -519,7 +519,7 @@ define i32 @not_shrd_i32(i32, i32, i32) { ; X86-LABEL: not_shrd_i32: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: shrl %cl, %edx ; X86-NEXT: negb %cl diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll index d85f4f520bd252..ae337fcd6a02c1 100644 --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -15,7 +15,7 @@ define void @test_lshr_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind { ; i686-NEXT: pushl %esi ; i686-NEXT: subl $20, %esp ; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movb {{[0-9]+}}(%esp), %al ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: movl %ebp, %esi @@ -152,7 +152,7 @@ define void @test_ashr_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind { ; i686-NEXT: pushl %esi ; i686-NEXT: subl $24, %esp ; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movb {{[0-9]+}}(%esp), %al ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: movl %ebp, %esi @@ -295,7 +295,7 @@ define void @test_shl_i128(i128 %x, i128 %a, ptr nocapture %r) nounwind { ; i686-NEXT: subl $20, %esp ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movb {{[0-9]+}}(%esp), %al ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx ; i686-NEXT: movl %eax, %ecx ; i686-NEXT: shll %cl, %ebx @@ -722,7 +722,7 @@ define void @test_lshr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no ; x86_64: # %bb.0: # %entry ; x86_64-NEXT: movq %rcx, %rax ; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; x86_64-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d +; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b ; x86_64-NEXT: movl %r9d, %ecx ; x86_64-NEXT: shrdq %cl, %rax, %rdx ; x86_64-NEXT: movl %r8d, %ecx @@ -1016,7 +1016,7 @@ define void @test_ashr_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) no ; x86_64: # %bb.0: # %entry ; x86_64-NEXT: movq %rcx, %r11 ; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; x86_64-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d +; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b ; x86_64-NEXT: movl %r9d, %ecx ; x86_64-NEXT: shrdq %cl, %r11, %rdx ; x86_64-NEXT: movl %r8d, %ecx @@ -1230,7 +1230,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou ; i686-NEXT: .LBB8_29: # %entry ; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi ; i686-NEXT: jne .LBB8_30 ; i686-NEXT: # %bb.31: # %entry @@ -1242,7 +1242,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou ; i686-NEXT: .LBB8_34: # %entry ; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB8_35: # %entry -; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx ; i686-NEXT: shrdl %cl, %ebx, %esi ; i686-NEXT: testb $32, %cl @@ -1327,7 +1327,7 @@ define void @test_shl_v2i128(<2 x i128> %x, <2 x i128> %a, ptr nocapture %r) nou ; x86_64: # %bb.0: # %entry ; x86_64-NEXT: movq %rcx, %rax ; x86_64-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; x86_64-NEXT: movzbl {{[0-9]+}}(%rsp), %r9d +; x86_64-NEXT: movb {{[0-9]+}}(%rsp), %r9b ; x86_64-NEXT: movl %r9d, %ecx ; x86_64-NEXT: shldq %cl, %rdx, %rax ; x86_64-NEXT: movl %r8d, %ecx diff --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll index fe52c7bece86dd..84d59a33acd8bc 100644 --- a/llvm/test/CodeGen/X86/shift-mask.ll +++ b/llvm/test/CodeGen/X86/shift-mask.ll @@ -20,7 +20,7 @@ define i8 @test_i8_shl_lshr_0(i8 %a0) { ; X86-LABEL: test_i8_shl_lshr_0: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $-8, %al ; X86-NEXT: retl ; @@ -38,7 +38,7 @@ define i8 @test_i8_shl_lshr_0(i8 %a0) { define i8 @test_i8_shl_lshr_1(i8 %a0) { ; X86-LABEL: test_i8_shl_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $2, %al ; X86-NEXT: andb $-32, %al ; X86-NEXT: retl @@ -66,7 +66,7 @@ define i8 @test_i8_shl_lshr_1(i8 %a0) { define i8 @test_i8_shl_lshr_2(i8 %a0) { ; X86-LABEL: test_i8_shl_lshr_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb $2, %al ; X86-NEXT: andb $56, %al ; X86-NEXT: retl @@ -318,7 +318,7 @@ define i64 @test_i64_shl_lshr_2(i64 %a0) { define i8 @test_i8_lshr_lshr_0(i8 %a0) { ; X86-LABEL: test_i8_lshr_lshr_0: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $31, %al ; X86-NEXT: retl ; @@ -336,7 +336,7 @@ define i8 @test_i8_lshr_lshr_0(i8 %a0) { define i8 @test_i8_lshr_lshr_1(i8 %a0) { ; X86-LABEL: test_i8_lshr_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shrb $2, %al ; X86-NEXT: andb $7, %al ; X86-NEXT: retl @@ -364,7 +364,7 @@ define i8 @test_i8_lshr_lshr_1(i8 %a0) { define i8 @test_i8_lshr_lshr_2(i8 %a0) { ; X86-LABEL: test_i8_lshr_lshr_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $2, %al ; X86-NEXT: andb $28, %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/smul_fix.ll b/llvm/test/CodeGen/X86/smul_fix.ll index 8c2b945d6a8ce1..ccfc53c02e9a3f 100644 --- a/llvm/test/CodeGen/X86/smul_fix.ll +++ b/llvm/test/CodeGen/X86/smul_fix.ll @@ -114,10 +114,10 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: sarb $4, %al -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $4, %cl ; X86-NEXT: sarb $4, %cl ; X86-NEXT: movsbl %cl, %ecx @@ -255,10 +255,10 @@ define i4 @func6(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: sarb $4, %al -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $4, %cl ; X86-NEXT: sarb $4, %cl ; X86-NEXT: mulb %cl diff --git a/llvm/test/CodeGen/X86/smul_fix_sat.ll b/llvm/test/CodeGen/X86/smul_fix_sat.ll index 996601ed3be644..0463886fe2285f 100644 --- a/llvm/test/CodeGen/X86/smul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/smul_fix_sat.ll @@ -164,10 +164,10 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: sarb $4, %al -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $4, %cl ; X86-NEXT: movsbl %cl, %ecx ; X86-NEXT: movsbl %al, %eax @@ -475,10 +475,10 @@ define i4 @func6(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shlb $4, %cl ; X86-NEXT: sarb $4, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: movb %al, %ah diff --git a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll index 412128ba7f5107..42623573a838cf 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-illegal-types.ll @@ -33,7 +33,7 @@ define i1 @test_srem_odd(i29 %X) nounwind { define i1 @test_srem_even(i4 %X) nounwind { ; X86-LABEL: test_srem_even: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shlb $4, %cl ; X86-NEXT: sarb $4, %cl @@ -79,7 +79,7 @@ define i1 @test_srem_even(i4 %X) nounwind { define i1 @test_srem_pow2_setne(i6 %X) nounwind { ; X86-LABEL: test_srem_pow2_setne: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shlb $2, %cl ; X86-NEXT: sarb $5, %cl @@ -218,9 +218,9 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_srem_vec: diff --git a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll index 806b209fe66c31..9fca269ead97f4 100644 --- a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll +++ b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -1323,7 +1323,7 @@ define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, ; ; X86-AVX512-LABEL: add_ss_mask: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-AVX512-NEXT: kmovw %eax, %k1 ; X86-AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm2 {%k1} ; X86-AVX512-NEXT: vmovaps %xmm2, %xmm0 @@ -1417,7 +1417,7 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> ; ; X86-AVX512-LABEL: add_sd_mask: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-AVX512-NEXT: kmovw %eax, %k1 ; X86-AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm2 {%k1} ; X86-AVX512-NEXT: vmovapd %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 348fe27616479a..8a129715b46919 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -4379,7 +4379,7 @@ define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind { ; ; X86-AVX512-LABEL: test_mm_set1_epi8: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] ; X86-AVX512-NEXT: vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0] ; X86-AVX512-NEXT: retl # encoding: [0xc3] ; diff --git a/llvm/test/CodeGen/X86/sshl_sat.ll b/llvm/test/CodeGen/X86/sshl_sat.ll index 2b87e17a0b5e32..ce89c091c77a38 100644 --- a/llvm/test/CodeGen/X86/sshl_sat.ll +++ b/llvm/test/CodeGen/X86/sshl_sat.ll @@ -33,7 +33,7 @@ define i16 @func(i16 %x, i16 %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %edx, %esi ; X86-NEXT: shll %cl, %esi ; X86-NEXT: movswl %si, %edi @@ -77,7 +77,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind { ; X86-LABEL: func2: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl %eax, %eax ; X86-NEXT: movl %eax, %edx @@ -179,9 +179,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind { ; X86-LABEL: func4: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: shlb $4, %dl ; X86-NEXT: movb %dl, %ch ; X86-NEXT: shlb %cl, %ch @@ -225,7 +225,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %edx, %ebx @@ -288,7 +288,7 @@ define i18 @func6(i16 %x, i16 %y) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movswl {{[0-9]+}}(%esp), %edx ; X86-NEXT: shll $14, %edx ; X86-NEXT: movl %edx, %esi @@ -332,7 +332,7 @@ define i32 @func7(i32 %x, i32 %y) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: shll %cl, %esi @@ -373,8 +373,8 @@ define i8 @func8(i8 %x, i8 %y) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: movb %dl, %ch ; X86-NEXT: shlb %cl, %ch ; X86-NEXT: movzbl %ch, %esi diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll index 40f90250975860..a0d175fe9de93e 100644 --- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll @@ -108,7 +108,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: cmpl %eax, %edi ; X86-NEXT: cmovel %ebp, %edx ; X86-NEXT: movl %esi, %edi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shll %cl, %edi ; X86-NEXT: movl %edi, %ebp ; X86-NEXT: sarl %cl, %ebp @@ -120,7 +120,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: cmovel %edi, %eax ; X86-NEXT: movl %esi, %edi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shll %cl, %edi ; X86-NEXT: movl %edi, %ebp ; X86-NEXT: sarl %cl, %ebp diff --git a/llvm/test/CodeGen/X86/ssub_sat.ll b/llvm/test/CodeGen/X86/ssub_sat.ll index 8ecc8b39ac4683..be8926b5d948aa 100644 --- a/llvm/test/CodeGen/X86/ssub_sat.ll +++ b/llvm/test/CodeGen/X86/ssub_sat.ll @@ -99,8 +99,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y) nounwind { define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: cmpb %dl, %al ; X86-NEXT: setns %cl @@ -129,7 +129,7 @@ define signext i8 @func8(i8 signext %x, i8 signext %y) nounwind { define signext i4 @func3(i4 signext %x, i4 signext %y) nounwind { ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: subb {{[0-9]+}}(%esp), %al ; X86-NEXT: movzbl %al, %ecx ; X86-NEXT: cmpb $7, %al diff --git a/llvm/test/CodeGen/X86/ssub_sat_plus.ll b/llvm/test/CodeGen/X86/ssub_sat_plus.ll index 5baf7a1dac74c8..45e01e5610afdc 100644 --- a/llvm/test/CodeGen/X86/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_plus.ll @@ -105,8 +105,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: mulb {{[0-9]+}}(%esp) ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: cmpb %al, %dl @@ -140,8 +140,8 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { define signext i4 @func4(i4 signext %x, i4 signext %y, i4 signext %z) nounwind { ; X86-LABEL: func4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: mulb {{[0-9]+}}(%esp) ; X86-NEXT: shlb $4, %al ; X86-NEXT: sarb $4, %al diff --git a/llvm/test/CodeGen/X86/ssub_sat_vec.ll b/llvm/test/CodeGen/X86/ssub_sat_vec.ll index 42f346c0fd558b..c00f9d0c9dd12e 100644 --- a/llvm/test/CodeGen/X86/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ssub_sat_vec.ll @@ -429,8 +429,8 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; SSE-LABEL: v1i8: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %eax -; SSE-NEXT: movzbl (%rsi), %ecx +; SSE-NEXT: movb (%rdi), %al +; SSE-NEXT: movb (%rsi), %cl ; SSE-NEXT: xorl %esi, %esi ; SSE-NEXT: cmpb %cl, %al ; SSE-NEXT: setns %sil @@ -443,8 +443,8 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; ; AVX-LABEL: v1i8: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax -; AVX-NEXT: movzbl (%rsi), %ecx +; AVX-NEXT: movb (%rdi), %al +; AVX-NEXT: movb (%rsi), %cl ; AVX-NEXT: xorl %esi, %esi ; AVX-NEXT: cmpb %cl, %al ; AVX-NEXT: setns %sil diff --git a/llvm/test/CodeGen/X86/store-narrow.ll b/llvm/test/CodeGen/X86/store-narrow.ll index 97e31b3fa422bd..2632b56f0b325f 100644 --- a/llvm/test/CodeGen/X86/store-narrow.ll +++ b/llvm/test/CodeGen/X86/store-narrow.ll @@ -13,19 +13,12 @@ define void @test1(ptr nocapture %a0, i8 zeroext %a1) nounwind ssp { ; X64-NEXT: movb %sil, (%rdi) ; X64-NEXT: retq ; -; X86-BWON-LABEL: test1: -; X86-BWON: ## %bb.0: ## %entry -; X86-BWON-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BWON-NEXT: movb %al, (%ecx) -; X86-BWON-NEXT: retl -; -; X86-BWOFF-LABEL: test1: -; X86-BWOFF: ## %bb.0: ## %entry -; X86-BWOFF-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BWOFF-NEXT: movb %al, (%ecx) -; X86-BWOFF-NEXT: retl +; X86-LABEL: test1: +; X86: ## %bb.0: ## %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb %al, (%ecx) +; X86-NEXT: retl entry: %A = load i32, ptr %a0, align 4 %B = and i32 %A, -256 ; 0xFFFFFF00 @@ -41,19 +34,12 @@ define void @test2(ptr nocapture %a0, i8 zeroext %a1) nounwind ssp { ; X64-NEXT: movb %sil, 1(%rdi) ; X64-NEXT: retq ; -; X86-BWON-LABEL: test2: -; X86-BWON: ## %bb.0: ## %entry -; X86-BWON-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BWON-NEXT: movb %al, 1(%ecx) -; X86-BWON-NEXT: retl -; -; X86-BWOFF-LABEL: test2: -; X86-BWOFF: ## %bb.0: ## %entry -; X86-BWOFF-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BWOFF-NEXT: movb %al, 1(%ecx) -; X86-BWOFF-NEXT: retl +; X86-LABEL: test2: +; X86: ## %bb.0: ## %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb %al, 1(%ecx) +; X86-NEXT: retl entry: %A = load i32, ptr %a0, align 4 %B = and i32 %A, -65281 ; 0xFFFF00FF @@ -156,19 +142,12 @@ define void @test6(ptr nocapture %a0, i8 zeroext %a1) nounwind ssp { ; X64-NEXT: movb %sil, 5(%rdi) ; X64-NEXT: retq ; -; X86-BWON-LABEL: test6: -; X86-BWON: ## %bb.0: ## %entry -; X86-BWON-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BWON-NEXT: movb %al, 5(%ecx) -; X86-BWON-NEXT: retl -; -; X86-BWOFF-LABEL: test6: -; X86-BWOFF: ## %bb.0: ## %entry -; X86-BWOFF-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BWOFF-NEXT: movb %al, 5(%ecx) -; X86-BWOFF-NEXT: retl +; X86-LABEL: test6: +; X86: ## %bb.0: ## %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb %al, 5(%ecx) +; X86-NEXT: retl entry: %A = load i64, ptr %a0, align 4 %B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF @@ -186,23 +165,14 @@ define i32 @test7(ptr nocapture %a0, i8 zeroext %a1, ptr %P2) nounwind { ; X64-NEXT: movb %sil, 5(%rdi) ; X64-NEXT: retq ; -; X86-BWON-LABEL: test7: -; X86-BWON: ## %bb.0: ## %entry -; X86-BWON-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BWON-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BWON-NEXT: movl (%eax), %eax -; X86-BWON-NEXT: movb %cl, 5(%edx) -; X86-BWON-NEXT: retl -; -; X86-BWOFF-LABEL: test7: -; X86-BWOFF: ## %bb.0: ## %entry -; X86-BWOFF-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-BWOFF-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BWOFF-NEXT: movl (%eax), %eax -; X86-BWOFF-NEXT: movb %cl, 5(%edx) -; X86-BWOFF-NEXT: retl +; X86-LABEL: test7: +; X86: ## %bb.0: ## %entry +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: movb %cl, 5(%edx) +; X86-NEXT: retl entry: %OtherLoad = load i32 , ptr%P2 %A = load i64, ptr %a0, align 4 diff --git a/llvm/test/CodeGen/X86/sttni.ll b/llvm/test/CodeGen/X86/sttni.ll index 7d1a6171c844a9..83055fc3eebe97 100644 --- a/llvm/test/CodeGen/X86/sttni.ll +++ b/llvm/test/CodeGen/X86/sttni.ll @@ -70,7 +70,7 @@ define i32 @pcmpestri_reg_diff_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, ; X86-NEXT: .LBB2_2: # %compare ; X86-NEXT: movdqa %xmm0, (%esp) ; X86-NEXT: andl $15, %ecx -; X86-NEXT: movzbl (%esp,%ecx), %eax +; X86-NEXT: movb (%esp,%ecx), %al ; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) ; X86-NEXT: subb 16(%esp,%ecx), %al ; X86-NEXT: .LBB2_3: # %exit @@ -94,7 +94,7 @@ define i32 @pcmpestri_reg_diff_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, ; X64-NEXT: .LBB2_2: # %compare ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $15, %ecx -; X64-NEXT: movzbl -24(%rsp,%rcx), %eax +; X64-NEXT: movb -24(%rsp,%rcx), %al ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-NEXT: subb -40(%rsp,%rcx), %al ; X64-NEXT: movzbl %al, %eax @@ -200,7 +200,7 @@ define i32 @pcmpestri_mem_diff_i8(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 ; X86-NEXT: .LBB5_2: # %compare ; X86-NEXT: movdqa %xmm1, (%esp) ; X86-NEXT: andl $15, %ecx -; X86-NEXT: movzbl (%esp,%ecx), %eax +; X86-NEXT: movb (%esp,%ecx), %al ; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) ; X86-NEXT: subb 16(%esp,%ecx), %al ; X86-NEXT: .LBB5_3: # %exit @@ -227,7 +227,7 @@ define i32 @pcmpestri_mem_diff_i8(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 ; X64-NEXT: .LBB5_2: # %compare ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $15, %ecx -; X64-NEXT: movzbl -24(%rsp,%rcx), %eax +; X64-NEXT: movb -24(%rsp,%rcx), %al ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: subb -40(%rsp,%rcx), %al ; X64-NEXT: movzbl %al, %eax @@ -559,7 +559,7 @@ define i32 @pcmpistri_reg_diff_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { ; X86-NEXT: subl $48, %esp ; X86-NEXT: movdqa %xmm0, (%esp) ; X86-NEXT: andl $15, %ecx -; X86-NEXT: movzbl (%esp,%ecx), %eax +; X86-NEXT: movb (%esp,%ecx), %al ; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) ; X86-NEXT: subb 16(%esp,%ecx), %al ; X86-NEXT: movl %ebp, %esp @@ -580,7 +580,7 @@ define i32 @pcmpistri_reg_diff_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { ; X64-NEXT: .LBB14_2: # %compare ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $15, %ecx -; X64-NEXT: movzbl -24(%rsp,%rcx), %eax +; X64-NEXT: movb -24(%rsp,%rcx), %al ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-NEXT: subb -40(%rsp,%rcx), %al ; X64-NEXT: movzbl %al, %eax @@ -669,7 +669,7 @@ define i32 @pcmpistri_mem_diff_i8(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { ; X86-NEXT: .LBB17_2: # %compare ; X86-NEXT: movdqa %xmm1, (%esp) ; X86-NEXT: andl $15, %ecx -; X86-NEXT: movzbl (%esp,%ecx), %eax +; X86-NEXT: movb (%esp,%ecx), %al ; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) ; X86-NEXT: subb 16(%esp,%ecx), %al ; X86-NEXT: .LBB17_3: # %exit @@ -693,7 +693,7 @@ define i32 @pcmpistri_mem_diff_i8(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { ; X64-NEXT: .LBB17_2: # %compare ; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) ; X64-NEXT: andl $15, %ecx -; X64-NEXT: movzbl -24(%rsp,%rcx), %eax +; X64-NEXT: movb -24(%rsp,%rcx), %al ; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: subb -40(%rsp,%rcx), %al ; X64-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/sub-of-not.ll b/llvm/test/CodeGen/X86/sub-of-not.ll index 2b8f6c18ff086f..08e99122ed9b76 100644 --- a/llvm/test/CodeGen/X86/sub-of-not.ll +++ b/llvm/test/CodeGen/X86/sub-of-not.ll @@ -12,7 +12,7 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounwind { ; X86-LABEL: scalar_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: incb %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/swifterror.ll b/llvm/test/CodeGen/X86/swifterror.ll index 88b81f12f4d67f..c3ecd05c0fded9 100644 --- a/llvm/test/CodeGen/X86/swifterror.ll +++ b/llvm/test/CodeGen/X86/swifterror.ll @@ -79,7 +79,7 @@ define float @caller(ptr %error_ref) { ; CHECK-APPLE-NEXT: testq %r12, %r12 ; CHECK-APPLE-NEXT: jne LBB1_2 ; CHECK-APPLE-NEXT: ## %bb.1: ## %cont -; CHECK-APPLE-NEXT: movzbl 8(%rdi), %eax +; CHECK-APPLE-NEXT: movb 8(%rdi), %al ; CHECK-APPLE-NEXT: movb %al, (%rbx) ; CHECK-APPLE-NEXT: LBB1_2: ## %handler ; CHECK-APPLE-NEXT: callq _free @@ -131,7 +131,7 @@ define float @caller(ptr %error_ref) { ; CHECK-i386-NEXT: jne LBB1_2 ; CHECK-i386-NEXT: ## %bb.1: ## %cont ; CHECK-i386-NEXT: movl 16(%esp), %ecx -; CHECK-i386-NEXT: movzbl 8(%eax), %edx +; CHECK-i386-NEXT: movb 8(%eax), %dl ; CHECK-i386-NEXT: movb %dl, (%ecx) ; CHECK-i386-NEXT: LBB1_2: ## %handler ; CHECK-i386-NEXT: movl %eax, (%esp) @@ -182,7 +182,7 @@ define float @caller2(ptr %error_ref) { ; CHECK-APPLE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-APPLE-NEXT: jbe LBB2_1 ; CHECK-APPLE-NEXT: ## %bb.3: ## %bb_end -; CHECK-APPLE-NEXT: movzbl 8(%r12), %eax +; CHECK-APPLE-NEXT: movb 8(%r12), %al ; CHECK-APPLE-NEXT: movb %al, (%rbx) ; CHECK-APPLE-NEXT: LBB2_4: ## %handler ; CHECK-APPLE-NEXT: movq %r12, %rdi @@ -263,7 +263,7 @@ define float @caller2(ptr %error_ref) { ; CHECK-i386-NEXT: sahf ; CHECK-i386-NEXT: jbe LBB2_1 ; CHECK-i386-NEXT: ## %bb.3: ## %bb_end -; CHECK-i386-NEXT: movzbl 8(%ecx), %eax +; CHECK-i386-NEXT: movb 8(%ecx), %al ; CHECK-i386-NEXT: movb %al, (%esi) ; CHECK-i386-NEXT: fldz ; CHECK-i386-NEXT: LBB2_4: ## %handler @@ -632,7 +632,7 @@ define float @caller3(ptr %error_ref) { ; CHECK-APPLE-NEXT: testq %r12, %r12 ; CHECK-APPLE-NEXT: jne LBB6_2 ; CHECK-APPLE-NEXT: ## %bb.1: ## %cont -; CHECK-APPLE-NEXT: movzbl 8(%rdi), %eax +; CHECK-APPLE-NEXT: movb 8(%rdi), %al ; CHECK-APPLE-NEXT: movb %al, (%rbx) ; CHECK-APPLE-NEXT: LBB6_2: ## %handler ; CHECK-APPLE-NEXT: callq _free @@ -689,7 +689,7 @@ define float @caller3(ptr %error_ref) { ; CHECK-i386-NEXT: jne LBB6_2 ; CHECK-i386-NEXT: ## %bb.1: ## %cont ; CHECK-i386-NEXT: movl 48(%esp), %ecx -; CHECK-i386-NEXT: movzbl 8(%eax), %edx +; CHECK-i386-NEXT: movb 8(%eax), %dl ; CHECK-i386-NEXT: movb %dl, (%ecx) ; CHECK-i386-NEXT: LBB6_2: ## %handler ; CHECK-i386-NEXT: movl %eax, (%esp) @@ -744,7 +744,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_ ; CHECK-APPLE-NEXT: testq %r12, %r12 ; CHECK-APPLE-NEXT: jne LBB7_2 ; CHECK-APPLE-NEXT: ## %bb.1: ## %cont -; CHECK-APPLE-NEXT: movzbl 8(%rdi), %eax +; CHECK-APPLE-NEXT: movb 8(%rdi), %al ; CHECK-APPLE-NEXT: movb %al, (%rbx) ; CHECK-APPLE-NEXT: LBB7_2: ## %handler ; CHECK-APPLE-NEXT: callq _free @@ -757,7 +757,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_ ; CHECK-APPLE-NEXT: testq %r12, %r12 ; CHECK-APPLE-NEXT: jne LBB7_4 ; CHECK-APPLE-NEXT: ## %bb.3: ## %cont2 -; CHECK-APPLE-NEXT: movzbl 8(%rdi), %eax +; CHECK-APPLE-NEXT: movb 8(%rdi), %al ; CHECK-APPLE-NEXT: movb %al, (%r14) ; CHECK-APPLE-NEXT: LBB7_4: ## %handler2 ; CHECK-APPLE-NEXT: callq _free @@ -842,7 +842,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_ ; CHECK-i386-NEXT: jne LBB7_2 ; CHECK-i386-NEXT: ## %bb.1: ## %cont ; CHECK-i386-NEXT: movl 8(%ebp), %ecx -; CHECK-i386-NEXT: movzbl 8(%eax), %edx +; CHECK-i386-NEXT: movb 8(%eax), %dl ; CHECK-i386-NEXT: movb %dl, (%ecx) ; CHECK-i386-NEXT: LBB7_2: ## %handler ; CHECK-i386-NEXT: subl $12, %esp @@ -863,7 +863,7 @@ define float @caller_with_multiple_swifterror_values(ptr %error_ref, ptr %error_ ; CHECK-i386-NEXT: jne LBB7_4 ; CHECK-i386-NEXT: ## %bb.3: ## %cont2 ; CHECK-i386-NEXT: movl 12(%ebp), %ecx -; CHECK-i386-NEXT: movzbl 8(%eax), %edx +; CHECK-i386-NEXT: movb 8(%eax), %dl ; CHECK-i386-NEXT: movb %dl, (%ecx) ; CHECK-i386-NEXT: LBB7_4: ## %handler2 ; CHECK-i386-NEXT: subl $12, %esp diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll index 1548e2df42bedb..71708e7ceb46f6 100644 --- a/llvm/test/CodeGen/X86/tail-opts.ll +++ b/llvm/test/CodeGen/X86/tail-opts.ll @@ -244,7 +244,7 @@ define fastcc void @c_expand_expr_stmt(ptr %expr) nounwind { ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB3_9 ; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: movzbl 0, %ebx +; CHECK-NEXT: movb 0, %bl ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: jne .LBB3_8 diff --git a/llvm/test/CodeGen/X86/tls.ll b/llvm/test/CodeGen/X86/tls.ll index c4ee609148af31..308d7c5b1b82f6 100644 --- a/llvm/test/CodeGen/X86/tls.ll +++ b/llvm/test/CodeGen/X86/tls.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck -check-prefix=X86_LINUX %s ; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s ; RUN: llc < %s -mtriple=i386-linux-gnu -fast-isel | FileCheck -check-prefix=X86_ISEL_LINUX %s @@ -20,48 +19,29 @@ define dso_local i32 @f1() { ; X86_LINUX-LABEL: f1: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:i1@NTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:i1@NTPOFF, %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f1: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movl %fs:i1@TPOFF, %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f1: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:i1@NTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f1: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movl %fs:i1@TPOFF, %eax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movl %fs:i1@TPOFF, %eax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f1: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movl _i1@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: movl _i1@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f1: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f1: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movl _i1@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f1: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movl _i1@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: %tmp1 = load i32, ptr @i1 @@ -70,52 +50,31 @@ entry: define dso_local ptr @f2() { ; X86_LINUX-LABEL: f2: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:0, %eax -; X86_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:0, %eax +; X86_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f2: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq %fs:0, %rax -; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f2: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax -; X86_ISEL_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f2: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax -; X64_ISEL_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movq %fs:0, %rax +; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f2: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: leal _i1@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: leal _i1@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f2: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f2: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: leal _i1@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: leaq i1@SECREL32(%rax), %rax +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f2: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: leal _i1@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: ret ptr @i1 @@ -123,52 +82,31 @@ entry: define dso_local i32 @f3() nounwind { ; X86_LINUX-LABEL: f3: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl i2@INDNTPOFF, %eax -; X86_LINUX-NEXT: movl %gs:(%eax), %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl i2@INDNTPOFF, %eax +; X86_LINUX-NEXT: movl %gs:(%eax), %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f3: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq i2@GOTTPOFF(%rip), %rax -; X64_LINUX-NEXT: movl %fs:(%rax), %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f3: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl i2@INDNTPOFF, %eax -; X86_ISEL_LINUX-NEXT: movl %gs:(%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f3: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq i2@GOTTPOFF(%rip), %rax -; X64_ISEL_LINUX-NEXT: movl %fs:(%rax), %eax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movq i2@GOTTPOFF(%rip), %rax +; X64_LINUX-NEXT: movl %fs:(%rax), %eax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f3: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movl _i2@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: movl _i2@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f3: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f3: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movl _i2@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f3: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movl _i2@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: %tmp1 = load i32, ptr @i2 @@ -177,52 +115,31 @@ entry: define dso_local ptr @f4() { ; X86_LINUX-LABEL: f4: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:0, %eax -; X86_LINUX-NEXT: addl i2@INDNTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:0, %eax +; X86_LINUX-NEXT: addl i2@INDNTPOFF, %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f4: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq %fs:0, %rax -; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f4: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax -; X86_ISEL_LINUX-NEXT: addl i2@INDNTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f4: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax -; X64_ISEL_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movq %fs:0, %rax +; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f4: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: leal _i2@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: leal _i2@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f4: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f4: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: leal _i2@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: leaq i2@SECREL32(%rax), %rax +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f4: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: leal _i2@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: ret ptr @i2 @@ -230,48 +147,29 @@ entry: define dso_local i32 @f5() nounwind { ; X86_LINUX-LABEL: f5: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:i3@NTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:i3@NTPOFF, %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f5: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movl %fs:i3@TPOFF, %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f5: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:i3@NTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f5: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movl %fs:i3@TPOFF, %eax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movl %fs:i3@TPOFF, %eax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f5: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movl _i3@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: movl _i3@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f5: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f5: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movl _i3@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: movl i3@SECREL32(%rax), %eax +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f5: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movl _i3@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: %tmp1 = load i32, ptr @i3 @@ -280,52 +178,31 @@ entry: define dso_local ptr @f6() { ; X86_LINUX-LABEL: f6: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:0, %eax -; X86_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:0, %eax +; X86_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f6: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq %fs:0, %rax -; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f6: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax -; X86_ISEL_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f6: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax -; X64_ISEL_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movq %fs:0, %rax +; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f6: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: leal _i3@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: leal _i3@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f6: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f6: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: leal _i3@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: leaq i3@SECREL32(%rax), %rax +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f6: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: leal _i3@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: ret ptr @i3 @@ -333,48 +210,17 @@ entry: define dso_local i32 @f7() { ; X86_LINUX-LABEL: f7: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:i4@NTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:i4@NTPOFF, %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f7: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movl %fs:i4@TPOFF, %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f7: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:i4@NTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f7: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movl %fs:i4@TPOFF, %eax -; X64_ISEL_LINUX-NEXT: retq -; -; X86_WIN-LABEL: f7: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movl _i4@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; -; X64_WIN-LABEL: f7: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i4@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f7: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movl _i4@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_LINUX: movl %fs:i4@TPOFF, %eax +; X64_LINUX-NEXT: ret +; MINGW32-LABEL: _f7: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movl _i4@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: %tmp1 = load i32, ptr @i4 @@ -383,52 +229,19 @@ entry: define dso_local ptr @f8() { ; X86_LINUX-LABEL: f8: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:0, %eax -; X86_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:0, %eax +; X86_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f8: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq %fs:0, %rax -; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f8: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax -; X86_ISEL_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f8: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax -; X64_ISEL_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax -; X64_ISEL_LINUX-NEXT: retq -; -; X86_WIN-LABEL: f8: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: leal _i4@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; -; X64_WIN-LABEL: f8: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i4@SECREL32(%rax), %rax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f8: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: leal _i4@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_LINUX: movq %fs:0, %rax +; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax +; X64_LINUX-NEXT: ret +; MINGW32-LABEL: _f8: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: leal _i4@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: ret ptr @i4 @@ -436,48 +249,17 @@ entry: define dso_local i32 @f9() { ; X86_LINUX-LABEL: f9: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:i5@NTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:i5@NTPOFF, %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f9: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movl %fs:i5@TPOFF, %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f9: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:i5@NTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f9: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movl %fs:i5@TPOFF, %eax -; X64_ISEL_LINUX-NEXT: retq -; -; X86_WIN-LABEL: f9: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movl _i5@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; -; X64_WIN-LABEL: f9: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i5@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f9: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movl _i5@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_LINUX: movl %fs:i5@TPOFF, %eax +; X64_LINUX-NEXT: ret +; MINGW32-LABEL: _f9: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movl _i5@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: %tmp1 = load i32, ptr @i5 @@ -486,52 +268,19 @@ entry: define dso_local ptr @f10() { ; X86_LINUX-LABEL: f10: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:0, %eax -; X86_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:0, %eax +; X86_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f10: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq %fs:0, %rax -; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f10: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax -; X86_ISEL_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f10: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax -; X64_ISEL_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax -; X64_ISEL_LINUX-NEXT: retq -; -; X86_WIN-LABEL: f10: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: leal _i5@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; -; X64_WIN-LABEL: f10: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i5@SECREL32(%rax), %rax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f10: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: leal _i5@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_LINUX: movq %fs:0, %rax +; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax +; X64_LINUX-NEXT: ret +; MINGW32-LABEL: _f10: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: leal _i5@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: ret ptr @i5 @@ -539,48 +288,29 @@ entry: define i16 @f11() { ; X86_LINUX-LABEL: f11: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movzwl %gs:s1@NTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movzwl %gs:s1@NTPOFF, %eax +; X86_LINUX: ret ; X64_LINUX-LABEL: f11: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movzwl %fs:s1@TPOFF, %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f11: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movzwl %gs:s1@NTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f11: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movzwl %fs:s1@TPOFF, %eax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movzwl %fs:s1@TPOFF, %eax +; X64_LINUX: ret ; X86_WIN-LABEL: f11: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: movzwl _s1@SECREL32(%eax), %eax +; X86_WIN: ret ; X64_WIN-LABEL: f11: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f11: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movzwl _s1@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: movzwl s1@SECREL32(%rax), %eax +; X64_WIN: ret +; MINGW32-LABEL: _f11: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movzwl _s1@SECREL32(%eax), %eax +; MINGW32: retl entry: %tmp1 = load i16, ptr @s1 @@ -589,48 +319,29 @@ entry: define dso_local i32 @f12() { ; X86_LINUX-LABEL: f12: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movswl %gs:s1@NTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movswl %gs:s1@NTPOFF, %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f12: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movswl %fs:s1@TPOFF, %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f12: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movswl %gs:s1@NTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f12: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movswl %fs:s1@TPOFF, %eax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movswl %fs:s1@TPOFF, %eax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f12: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: movswl _s1@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f12: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f12: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movswl _s1@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: movswl s1@SECREL32(%rax), %eax +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f12: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movswl _s1@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: @@ -641,48 +352,29 @@ entry: define dso_local i8 @f13() { ; X86_LINUX-LABEL: f13: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movzbl %gs:b1@NTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movb %gs:b1@NTPOFF, %al +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f13: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movzbl %fs:b1@TPOFF, %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f13: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movzbl %gs:b1@NTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f13: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movzbl %fs:b1@TPOFF, %eax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movb %fs:b1@TPOFF, %al +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f13: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movzbl _b1@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: movb _b1@SECREL32(%eax), %al +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f13: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movzbl b1@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f13: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movzbl _b1@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: movb b1@SECREL32(%rax), %al +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f13: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movb _b1@SECREL32(%eax), %al +; MINGW32-NEXT: retl entry: %tmp1 = load i8, ptr @b1 @@ -691,48 +383,29 @@ entry: define dso_local i32 @f14() { ; X86_LINUX-LABEL: f14: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movsbl %gs:b1@NTPOFF, %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movsbl %gs:b1@NTPOFF, %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f14: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movsbl %fs:b1@TPOFF, %eax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f14: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movsbl %gs:b1@NTPOFF, %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f14: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movsbl %fs:b1@TPOFF, %eax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movsbl %fs:b1@TPOFF, %eax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f14: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl __tls_index, %eax +; X86_WIN-NEXT: movl %fs:__tls_array, %ecx +; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax +; X86_WIN-NEXT: movsbl _b1@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f14: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f14: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movsbl _b1@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_WIN: movl _tls_index(%rip), %eax +; X64_WIN-NEXT: movq %gs:88, %rcx +; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax +; X64_WIN-NEXT: movsbl b1@SECREL32(%rax), %eax +; X64_WIN-NEXT: ret +; MINGW32-LABEL: _f14: +; MINGW32: movl __tls_index, %eax +; MINGW32-NEXT: movl %fs:44, %ecx +; MINGW32-NEXT: movl (%ecx,%eax,4), %eax +; MINGW32-NEXT: movsbl _b1@SECREL32(%eax), %eax +; MINGW32-NEXT: retl entry: %tmp1 = load i8, ptr @b1 @@ -742,49 +415,28 @@ entry: define dso_local ptr @f15() { ; X86_LINUX-LABEL: f15: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:0, %eax -; X86_LINUX-NEXT: leal b2@NTPOFF(%eax), %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:0, %eax +; X86_LINUX-NEXT: leal b2@NTPOFF(%eax), %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f15: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq %fs:0, %rax -; X64_LINUX-NEXT: leaq b2@TPOFF(%rax), %rax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f15: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax -; X86_ISEL_LINUX-NEXT: leal b2@NTPOFF(%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f15: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax -; X64_ISEL_LINUX-NEXT: leaq b2@TPOFF(%rax), %rax -; X64_ISEL_LINUX-NEXT: retq -; +; X64_LINUX: movq %fs:0, %rax +; X64_LINUX-NEXT: leaq b2@TPOFF(%rax), %rax +; X64_LINUX-NEXT: ret ; X86_WIN-LABEL: f15: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl %fs:__tls_array, %eax -; X86_WIN-NEXT: movl (%eax), %eax -; X86_WIN-NEXT: leal _b2@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; +; X86_WIN: movl %fs:__tls_array, %eax +; X86_WIN-NEXT: movl (%eax), %eax +; X86_WIN-NEXT: leal _b2@SECREL32(%eax), %eax +; X86_WIN-NEXT: ret ; X64_WIN-LABEL: f15: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movq %gs:88, %rax -; X64_WIN-NEXT: movq (%rax), %rax -; X64_WIN-NEXT: leaq b2@SECREL32(%rax), %rax -; X64_WIN-NEXT: retq -; +; X64_WIN: movq %gs:88, %rax +; X64_WIN-NEXT: movq (%rax), %rax +; X64_WIN-NEXT: leaq b2@SECREL32(%rax), %rax +; X64_WIN-NEXT: ret ; MINGW32-LABEL: f15: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl %fs:44, %eax -; MINGW32-NEXT: movl (%eax), %eax -; MINGW32-NEXT: leal _b2@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; MINGW32: movl %fs:44, %eax +; MINGW32-NEXT: movl (%eax), %eax +; MINGW32-NEXT: leal _b2@SECREL32(%eax), %eax +; MINGW32-NEXT: ret entry: ret ptr @b2 } @@ -792,53 +444,14 @@ entry: define dso_local ptr @f16() { ; X86_LINUX-LABEL: f16: -; X86_LINUX: # %bb.0: -; X86_LINUX-NEXT: movl %gs:0, %eax -; X86_LINUX-NEXT: leal i6@NTPOFF(%eax), %eax -; X86_LINUX-NEXT: retl -; -; X64_LINUX-LABEL: f16: -; X64_LINUX: # %bb.0: -; X64_LINUX-NEXT: movq %fs:0, %rax -; X64_LINUX-NEXT: leaq i6@TPOFF(%rax), %rax -; X64_LINUX-NEXT: retq -; -; X86_ISEL_LINUX-LABEL: f16: -; X86_ISEL_LINUX: # %bb.0: -; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax -; X86_ISEL_LINUX-NEXT: leal i6@NTPOFF(%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; -; X64_ISEL_LINUX-LABEL: f16: -; X64_ISEL_LINUX: # %bb.0: -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax -; X64_ISEL_LINUX-NEXT: leaq i6@TPOFF(%rax), %rax -; X64_ISEL_LINUX-NEXT: retq -; -; X86_WIN-LABEL: f16: -; X86_WIN: # %bb.0: -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: leal _i6@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; -; X64_WIN-LABEL: f16: -; X64_WIN: # %bb.0: -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: leaq i6@SECREL32(%rax), %rax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f16: -; MINGW32: # %bb.0: -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: leal _i6@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X86_LINUX: movl %gs:0, %eax +; X86_LINUX-NEXT: leal i6@NTPOFF(%eax), %eax +; X86_LINUX-NEXT: ret +; X64_LINUX-LABEL: f16: +; X64_LINUX: movq %fs:0, %rax +; X64_LINUX-NEXT: leaq i6@TPOFF(%rax), %rax +; X64_LINUX-NEXT: ret ret ptr @i6 } @@ -846,52 +459,21 @@ define dso_local ptr @f16() { ; NOTE: Similar to f1() but with direct TLS segment access disabled define dso_local i32 @f17() #0 { ; X86_LINUX-LABEL: f17: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl %gs:0, %eax -; X86_LINUX-NEXT: movl i1@NTPOFF(%eax), %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl %gs:0, %eax +; X86_LINUX-NEXT: movl i1@NTPOFF(%eax), %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f17: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq %fs:0, %rax -; X64_LINUX-NEXT: movl i1@TPOFF(%rax), %eax -; X64_LINUX-NEXT: retq -; +; X64_LINUX: movq %fs:0, %rax +; X64_LINUX-NEXT: movl i1@TPOFF(%rax), %eax +; X64_LINUX-NEXT: ret ; X86_ISEL_LINUX-LABEL: f17: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl %gs:0, %eax -; X86_ISEL_LINUX-NEXT: movl i1@NTPOFF(%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; +; X86_ISEL_LINUX: movl %gs:0, %eax +; X86_ISEL_LINUX-NEXT: movl i1@NTPOFF(%eax), %eax +; X86_ISEL_LINUX-NEXT: ret ; X64_ISEL_LINUX-LABEL: f17: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rax -; X64_ISEL_LINUX-NEXT: movl i1@TPOFF(%rax), %eax -; X64_ISEL_LINUX-NEXT: retq -; -; X86_WIN-LABEL: f17: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movl _i1@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; -; X64_WIN-LABEL: f17: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i1@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f17: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movl _i1@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_ISEL_LINUX: movq %fs:0, %rax +; X64_ISEL_LINUX-NEXT: movl i1@TPOFF(%rax), %eax +; X64_ISEL_LINUX-NEXT: ret entry: %tmp1 = load i32, ptr @i1 @@ -901,56 +483,25 @@ entry: ; NOTE: Similar to f3() but with direct TLS segment access disabled define dso_local i32 @f18() #1 { ; X86_LINUX-LABEL: f18: -; X86_LINUX: # %bb.0: # %entry -; X86_LINUX-NEXT: movl i2@INDNTPOFF, %eax -; X86_LINUX-NEXT: movl %gs:0, %ecx -; X86_LINUX-NEXT: movl (%ecx,%eax), %eax -; X86_LINUX-NEXT: retl -; +; X86_LINUX: movl i2@INDNTPOFF, %eax +; X86_LINUX-NEXT: movl %gs:0, %ecx +; X86_LINUX-NEXT: movl (%ecx,%eax), %eax +; X86_LINUX-NEXT: ret ; X64_LINUX-LABEL: f18: -; X64_LINUX: # %bb.0: # %entry -; X64_LINUX-NEXT: movq i2@GOTTPOFF(%rip), %rax -; X64_LINUX-NEXT: movq %fs:0, %rcx -; X64_LINUX-NEXT: movl (%rcx,%rax), %eax -; X64_LINUX-NEXT: retq -; +; X64_LINUX: movq i2@GOTTPOFF(%rip), %rax +; X64_LINUX-NEXT: movq %fs:0, %rcx +; X64_LINUX-NEXT: movl (%rcx,%rax), %eax +; X64_LINUX-NEXT: ret ; X86_ISEL_LINUX-LABEL: f18: -; X86_ISEL_LINUX: # %bb.0: # %entry -; X86_ISEL_LINUX-NEXT: movl i2@INDNTPOFF, %eax -; X86_ISEL_LINUX-NEXT: movl %gs:0, %ecx -; X86_ISEL_LINUX-NEXT: movl (%ecx,%eax), %eax -; X86_ISEL_LINUX-NEXT: retl -; +; X86_ISEL_LINUX: movl i2@INDNTPOFF, %eax +; X86_ISEL_LINUX-NEXT: movl %gs:0, %ecx +; X86_ISEL_LINUX-NEXT: movl (%ecx,%eax), %eax +; X86_ISEL_LINUX-NEXT: ret ; X64_ISEL_LINUX-LABEL: f18: -; X64_ISEL_LINUX: # %bb.0: # %entry -; X64_ISEL_LINUX-NEXT: movq i2@GOTTPOFF(%rip), %rax -; X64_ISEL_LINUX-NEXT: movq %fs:0, %rcx -; X64_ISEL_LINUX-NEXT: movl (%rcx,%rax), %eax -; X64_ISEL_LINUX-NEXT: retq -; -; X86_WIN-LABEL: f18: -; X86_WIN: # %bb.0: # %entry -; X86_WIN-NEXT: movl __tls_index, %eax -; X86_WIN-NEXT: movl %fs:__tls_array, %ecx -; X86_WIN-NEXT: movl (%ecx,%eax,4), %eax -; X86_WIN-NEXT: movl _i2@SECREL32(%eax), %eax -; X86_WIN-NEXT: retl -; -; X64_WIN-LABEL: f18: -; X64_WIN: # %bb.0: # %entry -; X64_WIN-NEXT: movl _tls_index(%rip), %eax -; X64_WIN-NEXT: movq %gs:88, %rcx -; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax -; X64_WIN-NEXT: movl i2@SECREL32(%rax), %eax -; X64_WIN-NEXT: retq -; -; MINGW32-LABEL: f18: -; MINGW32: # %bb.0: # %entry -; MINGW32-NEXT: movl __tls_index, %eax -; MINGW32-NEXT: movl %fs:44, %ecx -; MINGW32-NEXT: movl (%ecx,%eax,4), %eax -; MINGW32-NEXT: movl _i2@SECREL32(%eax), %eax -; MINGW32-NEXT: retl +; X64_ISEL_LINUX: movq i2@GOTTPOFF(%rip), %rax +; X64_ISEL_LINUX-NEXT: movq %fs:0, %rcx +; X64_ISEL_LINUX-NEXT: movl (%rcx,%rax), %eax +; X64_ISEL_LINUX-NEXT: ret entry: diff --git a/llvm/test/CodeGen/X86/trunc-to-bool.ll b/llvm/test/CodeGen/X86/trunc-to-bool.ll index 5a5d057597465d..b0d656db34eec2 100644 --- a/llvm/test/CodeGen/X86/trunc-to-bool.ll +++ b/llvm/test/CodeGen/X86/trunc-to-bool.ll @@ -7,7 +7,7 @@ define zeroext i1 @test1(i32 %X) nounwind { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al ; CHECK-NEXT: andb $1, %al ; CHECK-NEXT: retl %Y = trunc i32 %X to i1 diff --git a/llvm/test/CodeGen/X86/uadd_sat.ll b/llvm/test/CodeGen/X86/uadd_sat.ll index 0a3c2ae344fd32..cbecdefbec2608 100644 --- a/llvm/test/CodeGen/X86/uadd_sat.ll +++ b/llvm/test/CodeGen/X86/uadd_sat.ll @@ -74,7 +74,7 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind { define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: movzbl %al, %ecx ; X86-NEXT: movl $255, %eax @@ -97,7 +97,7 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind { define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind { ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: movzbl %al, %ecx ; X86-NEXT: cmpb $15, %al diff --git a/llvm/test/CodeGen/X86/uadd_sat_plus.ll b/llvm/test/CodeGen/X86/uadd_sat_plus.ll index 654e3d77f52aa4..82766af1daad00 100644 --- a/llvm/test/CodeGen/X86/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/X86/uadd_sat_plus.ll @@ -80,7 +80,7 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: mulb {{[0-9]+}}(%esp) ; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: movzbl %al, %ecx @@ -108,7 +108,7 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind { define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind { ; X86-LABEL: func4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: mulb {{[0-9]+}}(%esp) ; X86-NEXT: andb $15, %al ; X86-NEXT: addb {{[0-9]+}}(%esp), %al diff --git a/llvm/test/CodeGen/X86/uadd_sat_vec.ll b/llvm/test/CodeGen/X86/uadd_sat_vec.ll index 1286f2da6405a4..836f52439e4c09 100644 --- a/llvm/test/CodeGen/X86/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/uadd_sat_vec.ll @@ -429,7 +429,7 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; SSE-LABEL: v1i8: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: addb (%rsi), %al ; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: movl $255, %ecx @@ -439,7 +439,7 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; ; AVX-LABEL: v1i8: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax +; AVX-NEXT: movb (%rdi), %al ; AVX-NEXT: addb (%rsi), %al ; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: movl $255, %ecx diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll index 8d3319eb595883..eba1fd1565e1b3 100644 --- a/llvm/test/CodeGen/X86/udiv_fix.ll +++ b/llvm/test/CodeGen/X86/udiv_fix.ll @@ -122,9 +122,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $15, %al ; X86-NEXT: shlb $2, %al ; X86-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll index ce3f635ab13460..33d0bd7c4f90fa 100644 --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -150,9 +150,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $15, %al ; X86-NEXT: shlb $2, %al ; X86-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/umul_fix.ll b/llvm/test/CodeGen/X86/umul_fix.ll index cb4bdd1ede75c9..fce98cc4486459 100644 --- a/llvm/test/CodeGen/X86/umul_fix.ll +++ b/llvm/test/CodeGen/X86/umul_fix.ll @@ -85,9 +85,9 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $15, %al -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: mulb %cl ; X86-NEXT: shrb $2, %al @@ -206,9 +206,9 @@ define i4 @func6(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $15, %al -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: mulb %cl ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/umul_fix_sat.ll b/llvm/test/CodeGen/X86/umul_fix_sat.ll index 36e3749654f905..247b5ee17e7a55 100644 --- a/llvm/test/CodeGen/X86/umul_fix_sat.ll +++ b/llvm/test/CodeGen/X86/umul_fix_sat.ll @@ -113,9 +113,9 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: andb $15, %al -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movzbl %al, %edx ; X86-NEXT: shlb $4, %cl ; X86-NEXT: movzbl %cl, %eax @@ -335,9 +335,9 @@ define i4 @func6(i4 %x, i4 %y) nounwind { ; ; X86-LABEL: func6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: mulb %cl ; X86-NEXT: movzbl %al, %ecx diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll index d0deed539bc521..3d7544f7f68142 100644 --- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -126,7 +126,7 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; X86-NEXT: testl %edi, %edi ; X86-NEXT: setne %bh ; X86-NEXT: andb %cl, %bh -; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload ; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll index a6ca83a3a2af5f..2845fc1e530802 100644 --- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask.ll @@ -108,9 +108,9 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %r9b, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %r9b, %sil @@ -132,9 +132,9 @@ define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-SSE1-LABEL: out_v4i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %r9b, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %r9b, %sil @@ -175,8 +175,8 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi ; CHECK-BASELINE-LABEL: out_v4i8_undef: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %r9b, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil @@ -196,8 +196,8 @@ define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwi ; CHECK-SSE1-LABEL: out_v4i8_undef: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %r9b, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil @@ -309,14 +309,14 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rdi, %rax -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %bl, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %bl, %sil @@ -332,15 +332,15 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-BASELINE-NEXT: xorb %bpl, %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: xorb %bpl, %r9b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %r11b, %bpl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %r11b, %bpl -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %r10b, %r11b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %r10b, %r11b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %dil, %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %dil, %bl @@ -367,14 +367,14 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %bl, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %bl, %sil @@ -390,15 +390,15 @@ define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-SSE1-NEXT: xorb %bpl, %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: xorb %bpl, %r9b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %r11b, %bpl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %r11b, %bpl -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %r10b, %r11b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %r10b, %r11b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %dil, %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %dil, %bl @@ -635,15 +635,15 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movl %edx, %r11d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %bl, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %bl, %sil @@ -662,54 +662,54 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin ; CHECK-BASELINE-NEXT: xorb %r12b, %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: xorb %r12b, %r9b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: xorb %bpl, %r12b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: xorb %bpl, %r12b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %r14b, %bpl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %r14b, %bpl -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %r15b, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %r15b, %sil -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: xorb %r13b, %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: xorb %r13b, %dl -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %al, %r13b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb %al, %r13b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: xorb %al, %r15b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %al, %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %al, %bl -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %r8b, %al -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %r8b, %r10b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: xorb %r8b, %r10b @@ -750,15 +750,15 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movl %edx, %r11d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %bl, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %bl, %sil @@ -777,54 +777,54 @@ define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwin ; CHECK-SSE1-NEXT: xorb %r12b, %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: xorb %r12b, %r9b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: xorb %bpl, %r12b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: xorb %bpl, %r12b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %r14b, %bpl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %r14b, %bpl -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %r15b, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %r15b, %sil -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: xorb %r13b, %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: xorb %r13b, %dl -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %al, %r13b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb %al, %r13b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: xorb %al, %r15b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %al, %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %al, %bl -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %r8b, %al -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %r8b, %r10b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: xorb %r8b, %r10b @@ -1198,196 +1198,196 @@ define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-NEXT: movq %rdx, %r8 ; CHECK-BASELINE-NEXT: movq %rsi, %r9 ; CHECK-BASELINE-NEXT: movq %rdi, %r11 -; CHECK-BASELINE-NEXT: movzbl 15(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 15(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 14(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 14(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 13(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 13(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 12(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 12(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 11(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 11(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 10(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 10(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 9(%rdx), %ebp -; CHECK-BASELINE-NEXT: movzbl 8(%rdx), %r14d -; CHECK-BASELINE-NEXT: movzbl 7(%rdx), %r15d -; CHECK-BASELINE-NEXT: movzbl 6(%rdx), %r12d -; CHECK-BASELINE-NEXT: movzbl 5(%rdx), %esi -; CHECK-BASELINE-NEXT: movzbl 4(%rdx), %r13d -; CHECK-BASELINE-NEXT: movzbl 3(%rdx), %edx -; CHECK-BASELINE-NEXT: movzbl 2(%r8), %edi -; CHECK-BASELINE-NEXT: movzbl (%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 1(%r8), %ecx -; CHECK-BASELINE-NEXT: movzbl (%r9), %ebx +; CHECK-BASELINE-NEXT: movb 9(%rdx), %bpl +; CHECK-BASELINE-NEXT: movb 8(%rdx), %r14b +; CHECK-BASELINE-NEXT: movb 7(%rdx), %r15b +; CHECK-BASELINE-NEXT: movb 6(%rdx), %r12b +; CHECK-BASELINE-NEXT: movb 5(%rdx), %sil +; CHECK-BASELINE-NEXT: movb 4(%rdx), %r13b +; CHECK-BASELINE-NEXT: movb 3(%rdx), %dl +; CHECK-BASELINE-NEXT: movb 2(%r8), %dil +; CHECK-BASELINE-NEXT: movb (%r8), %al +; CHECK-BASELINE-NEXT: movb 1(%r8), %cl +; CHECK-BASELINE-NEXT: movb (%r9), %bl ; CHECK-BASELINE-NEXT: xorb %al, %bl ; CHECK-BASELINE-NEXT: andb (%r10), %bl ; CHECK-BASELINE-NEXT: xorb %al, %bl ; CHECK-BASELINE-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 1(%r9), %eax +; CHECK-BASELINE-NEXT: movb 1(%r9), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 1(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 2(%r9), %eax +; CHECK-BASELINE-NEXT: movb 2(%r9), %al ; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: andb 2(%r10), %al ; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 3(%r9), %eax +; CHECK-BASELINE-NEXT: movb 3(%r9), %al ; CHECK-BASELINE-NEXT: xorb %dl, %al ; CHECK-BASELINE-NEXT: andb 3(%r10), %al ; CHECK-BASELINE-NEXT: xorb %dl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 4(%r9), %eax +; CHECK-BASELINE-NEXT: movb 4(%r9), %al ; CHECK-BASELINE-NEXT: xorb %r13b, %al ; CHECK-BASELINE-NEXT: andb 4(%r10), %al ; CHECK-BASELINE-NEXT: xorb %r13b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 5(%r9), %eax +; CHECK-BASELINE-NEXT: movb 5(%r9), %al ; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: andb 5(%r10), %al ; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 6(%r9), %eax +; CHECK-BASELINE-NEXT: movb 6(%r9), %al ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: andb 6(%r10), %al ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 7(%r9), %eax +; CHECK-BASELINE-NEXT: movb 7(%r9), %al ; CHECK-BASELINE-NEXT: xorb %r15b, %al ; CHECK-BASELINE-NEXT: andb 7(%r10), %al ; CHECK-BASELINE-NEXT: xorb %r15b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 8(%r9), %eax +; CHECK-BASELINE-NEXT: movb 8(%r9), %al ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: andb 8(%r10), %al ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 9(%r9), %eax +; CHECK-BASELINE-NEXT: movb 9(%r9), %al ; CHECK-BASELINE-NEXT: xorb %bpl, %al ; CHECK-BASELINE-NEXT: andb 9(%r10), %al ; CHECK-BASELINE-NEXT: xorb %bpl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 10(%r9), %eax -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 10(%r9), %al +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 10(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 11(%r9), %eax -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 11(%r9), %al +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 11(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 12(%r9), %eax -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 12(%r9), %al +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 12(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 13(%r9), %eax -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 13(%r9), %al +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 13(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 14(%r9), %eax -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 14(%r9), %al +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 14(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 15(%r9), %eax -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 15(%r9), %al +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 15(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 16(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 16(%r9), %ecx +; CHECK-BASELINE-NEXT: movb 16(%r8), %al +; CHECK-BASELINE-NEXT: movb 16(%r9), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 16(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 17(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 17(%r9), %ecx +; CHECK-BASELINE-NEXT: movb 17(%r8), %al +; CHECK-BASELINE-NEXT: movb 17(%r9), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 17(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 18(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 18(%r9), %ecx +; CHECK-BASELINE-NEXT: movb 18(%r8), %al +; CHECK-BASELINE-NEXT: movb 18(%r9), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 18(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 19(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 19(%r9), %ecx +; CHECK-BASELINE-NEXT: movb 19(%r8), %al +; CHECK-BASELINE-NEXT: movb 19(%r9), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 19(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 20(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 20(%r9), %ecx +; CHECK-BASELINE-NEXT: movb 20(%r8), %al +; CHECK-BASELINE-NEXT: movb 20(%r9), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 20(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 21(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 21(%r9), %r13d +; CHECK-BASELINE-NEXT: movb 21(%r8), %al +; CHECK-BASELINE-NEXT: movb 21(%r9), %r13b ; CHECK-BASELINE-NEXT: xorb %al, %r13b ; CHECK-BASELINE-NEXT: andb 21(%r10), %r13b ; CHECK-BASELINE-NEXT: xorb %al, %r13b -; CHECK-BASELINE-NEXT: movzbl 22(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 22(%r9), %r12d +; CHECK-BASELINE-NEXT: movb 22(%r8), %al +; CHECK-BASELINE-NEXT: movb 22(%r9), %r12b ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: andb 22(%r10), %r12b ; CHECK-BASELINE-NEXT: xorb %al, %r12b -; CHECK-BASELINE-NEXT: movzbl 23(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 23(%r9), %r15d +; CHECK-BASELINE-NEXT: movb 23(%r8), %al +; CHECK-BASELINE-NEXT: movb 23(%r9), %r15b ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: andb 23(%r10), %r15b ; CHECK-BASELINE-NEXT: xorb %al, %r15b -; CHECK-BASELINE-NEXT: movzbl 24(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 24(%r9), %r14d +; CHECK-BASELINE-NEXT: movb 24(%r8), %al +; CHECK-BASELINE-NEXT: movb 24(%r9), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: andb 24(%r10), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b -; CHECK-BASELINE-NEXT: movzbl 25(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 25(%r9), %ebp +; CHECK-BASELINE-NEXT: movb 25(%r8), %al +; CHECK-BASELINE-NEXT: movb 25(%r9), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: andb 25(%r10), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl -; CHECK-BASELINE-NEXT: movzbl 26(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 26(%r9), %edi +; CHECK-BASELINE-NEXT: movb 26(%r8), %al +; CHECK-BASELINE-NEXT: movb 26(%r9), %dil ; CHECK-BASELINE-NEXT: xorb %al, %dil ; CHECK-BASELINE-NEXT: andb 26(%r10), %dil ; CHECK-BASELINE-NEXT: xorb %al, %dil -; CHECK-BASELINE-NEXT: movzbl 27(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 27(%r9), %esi +; CHECK-BASELINE-NEXT: movb 27(%r8), %al +; CHECK-BASELINE-NEXT: movb 27(%r9), %sil ; CHECK-BASELINE-NEXT: xorb %al, %sil ; CHECK-BASELINE-NEXT: andb 27(%r10), %sil ; CHECK-BASELINE-NEXT: xorb %al, %sil -; CHECK-BASELINE-NEXT: movzbl 28(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 28(%r9), %edx +; CHECK-BASELINE-NEXT: movb 28(%r8), %al +; CHECK-BASELINE-NEXT: movb 28(%r9), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 28(%r10), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl -; CHECK-BASELINE-NEXT: movzbl 29(%r8), %eax -; CHECK-BASELINE-NEXT: movzbl 29(%r9), %ecx +; CHECK-BASELINE-NEXT: movb 29(%r8), %al +; CHECK-BASELINE-NEXT: movb 29(%r9), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 29(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl -; CHECK-BASELINE-NEXT: movzbl 30(%r8), %ebx -; CHECK-BASELINE-NEXT: movzbl 30(%r9), %eax +; CHECK-BASELINE-NEXT: movb 30(%r8), %bl +; CHECK-BASELINE-NEXT: movb 30(%r9), %al ; CHECK-BASELINE-NEXT: xorb %bl, %al ; CHECK-BASELINE-NEXT: andb 30(%r10), %al ; CHECK-BASELINE-NEXT: xorb %bl, %al -; CHECK-BASELINE-NEXT: movzbl 31(%r8), %r8d -; CHECK-BASELINE-NEXT: movzbl 31(%r9), %ebx +; CHECK-BASELINE-NEXT: movb 31(%r8), %r8b +; CHECK-BASELINE-NEXT: movb 31(%r9), %bl ; CHECK-BASELINE-NEXT: xorb %r8b, %bl ; CHECK-BASELINE-NEXT: andb 31(%r10), %bl ; CHECK-BASELINE-NEXT: xorb %r8b, %bl @@ -1402,47 +1402,47 @@ define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-NEXT: movb %r15b, 23(%r11) ; CHECK-BASELINE-NEXT: movb %r12b, 22(%r11) ; CHECK-BASELINE-NEXT: movb %r13b, 21(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 20(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 19(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 18(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 17(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 16(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 15(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 14(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 13(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 12(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 11(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 10(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 9(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 8(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 7(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 6(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 5(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 4(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 3(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 2(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 1(%r11) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, (%r11) ; CHECK-BASELINE-NEXT: movq %r11, %rax ; CHECK-BASELINE-NEXT: popq %rbx @@ -1465,196 +1465,196 @@ define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-SSE1-NEXT: movq %rdx, %r8 ; CHECK-SSE1-NEXT: movq %rsi, %r9 ; CHECK-SSE1-NEXT: movq %rdi, %r11 -; CHECK-SSE1-NEXT: movzbl 15(%rdx), %eax +; CHECK-SSE1-NEXT: movb 15(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 14(%rdx), %eax +; CHECK-SSE1-NEXT: movb 14(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 13(%rdx), %eax +; CHECK-SSE1-NEXT: movb 13(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 12(%rdx), %eax +; CHECK-SSE1-NEXT: movb 12(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 11(%rdx), %eax +; CHECK-SSE1-NEXT: movb 11(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 10(%rdx), %eax +; CHECK-SSE1-NEXT: movb 10(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 9(%rdx), %ebp -; CHECK-SSE1-NEXT: movzbl 8(%rdx), %r14d -; CHECK-SSE1-NEXT: movzbl 7(%rdx), %r15d -; CHECK-SSE1-NEXT: movzbl 6(%rdx), %r12d -; CHECK-SSE1-NEXT: movzbl 5(%rdx), %esi -; CHECK-SSE1-NEXT: movzbl 4(%rdx), %r13d -; CHECK-SSE1-NEXT: movzbl 3(%rdx), %edx -; CHECK-SSE1-NEXT: movzbl 2(%r8), %edi -; CHECK-SSE1-NEXT: movzbl (%r8), %eax -; CHECK-SSE1-NEXT: movzbl 1(%r8), %ecx -; CHECK-SSE1-NEXT: movzbl (%r9), %ebx +; CHECK-SSE1-NEXT: movb 9(%rdx), %bpl +; CHECK-SSE1-NEXT: movb 8(%rdx), %r14b +; CHECK-SSE1-NEXT: movb 7(%rdx), %r15b +; CHECK-SSE1-NEXT: movb 6(%rdx), %r12b +; CHECK-SSE1-NEXT: movb 5(%rdx), %sil +; CHECK-SSE1-NEXT: movb 4(%rdx), %r13b +; CHECK-SSE1-NEXT: movb 3(%rdx), %dl +; CHECK-SSE1-NEXT: movb 2(%r8), %dil +; CHECK-SSE1-NEXT: movb (%r8), %al +; CHECK-SSE1-NEXT: movb 1(%r8), %cl +; CHECK-SSE1-NEXT: movb (%r9), %bl ; CHECK-SSE1-NEXT: xorb %al, %bl ; CHECK-SSE1-NEXT: andb (%r10), %bl ; CHECK-SSE1-NEXT: xorb %al, %bl ; CHECK-SSE1-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 1(%r9), %eax +; CHECK-SSE1-NEXT: movb 1(%r9), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 1(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 2(%r9), %eax +; CHECK-SSE1-NEXT: movb 2(%r9), %al ; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: andb 2(%r10), %al ; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 3(%r9), %eax +; CHECK-SSE1-NEXT: movb 3(%r9), %al ; CHECK-SSE1-NEXT: xorb %dl, %al ; CHECK-SSE1-NEXT: andb 3(%r10), %al ; CHECK-SSE1-NEXT: xorb %dl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 4(%r9), %eax +; CHECK-SSE1-NEXT: movb 4(%r9), %al ; CHECK-SSE1-NEXT: xorb %r13b, %al ; CHECK-SSE1-NEXT: andb 4(%r10), %al ; CHECK-SSE1-NEXT: xorb %r13b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 5(%r9), %eax +; CHECK-SSE1-NEXT: movb 5(%r9), %al ; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: andb 5(%r10), %al ; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 6(%r9), %eax +; CHECK-SSE1-NEXT: movb 6(%r9), %al ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: andb 6(%r10), %al ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 7(%r9), %eax +; CHECK-SSE1-NEXT: movb 7(%r9), %al ; CHECK-SSE1-NEXT: xorb %r15b, %al ; CHECK-SSE1-NEXT: andb 7(%r10), %al ; CHECK-SSE1-NEXT: xorb %r15b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 8(%r9), %eax +; CHECK-SSE1-NEXT: movb 8(%r9), %al ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: andb 8(%r10), %al ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 9(%r9), %eax +; CHECK-SSE1-NEXT: movb 9(%r9), %al ; CHECK-SSE1-NEXT: xorb %bpl, %al ; CHECK-SSE1-NEXT: andb 9(%r10), %al ; CHECK-SSE1-NEXT: xorb %bpl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 10(%r9), %eax -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 10(%r9), %al +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 10(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 11(%r9), %eax -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 11(%r9), %al +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 11(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 12(%r9), %eax -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 12(%r9), %al +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 12(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 13(%r9), %eax -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 13(%r9), %al +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 13(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 14(%r9), %eax -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 14(%r9), %al +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 14(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 15(%r9), %eax -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 15(%r9), %al +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %cl # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 15(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 16(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 16(%r9), %ecx +; CHECK-SSE1-NEXT: movb 16(%r8), %al +; CHECK-SSE1-NEXT: movb 16(%r9), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 16(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 17(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 17(%r9), %ecx +; CHECK-SSE1-NEXT: movb 17(%r8), %al +; CHECK-SSE1-NEXT: movb 17(%r9), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 17(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 18(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 18(%r9), %ecx +; CHECK-SSE1-NEXT: movb 18(%r8), %al +; CHECK-SSE1-NEXT: movb 18(%r9), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 18(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 19(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 19(%r9), %ecx +; CHECK-SSE1-NEXT: movb 19(%r8), %al +; CHECK-SSE1-NEXT: movb 19(%r9), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 19(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 20(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 20(%r9), %ecx +; CHECK-SSE1-NEXT: movb 20(%r8), %al +; CHECK-SSE1-NEXT: movb 20(%r9), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 20(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 21(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 21(%r9), %r13d +; CHECK-SSE1-NEXT: movb 21(%r8), %al +; CHECK-SSE1-NEXT: movb 21(%r9), %r13b ; CHECK-SSE1-NEXT: xorb %al, %r13b ; CHECK-SSE1-NEXT: andb 21(%r10), %r13b ; CHECK-SSE1-NEXT: xorb %al, %r13b -; CHECK-SSE1-NEXT: movzbl 22(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 22(%r9), %r12d +; CHECK-SSE1-NEXT: movb 22(%r8), %al +; CHECK-SSE1-NEXT: movb 22(%r9), %r12b ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: andb 22(%r10), %r12b ; CHECK-SSE1-NEXT: xorb %al, %r12b -; CHECK-SSE1-NEXT: movzbl 23(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 23(%r9), %r15d +; CHECK-SSE1-NEXT: movb 23(%r8), %al +; CHECK-SSE1-NEXT: movb 23(%r9), %r15b ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: andb 23(%r10), %r15b ; CHECK-SSE1-NEXT: xorb %al, %r15b -; CHECK-SSE1-NEXT: movzbl 24(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 24(%r9), %r14d +; CHECK-SSE1-NEXT: movb 24(%r8), %al +; CHECK-SSE1-NEXT: movb 24(%r9), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: andb 24(%r10), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b -; CHECK-SSE1-NEXT: movzbl 25(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 25(%r9), %ebp +; CHECK-SSE1-NEXT: movb 25(%r8), %al +; CHECK-SSE1-NEXT: movb 25(%r9), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: andb 25(%r10), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl -; CHECK-SSE1-NEXT: movzbl 26(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 26(%r9), %edi +; CHECK-SSE1-NEXT: movb 26(%r8), %al +; CHECK-SSE1-NEXT: movb 26(%r9), %dil ; CHECK-SSE1-NEXT: xorb %al, %dil ; CHECK-SSE1-NEXT: andb 26(%r10), %dil ; CHECK-SSE1-NEXT: xorb %al, %dil -; CHECK-SSE1-NEXT: movzbl 27(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 27(%r9), %esi +; CHECK-SSE1-NEXT: movb 27(%r8), %al +; CHECK-SSE1-NEXT: movb 27(%r9), %sil ; CHECK-SSE1-NEXT: xorb %al, %sil ; CHECK-SSE1-NEXT: andb 27(%r10), %sil ; CHECK-SSE1-NEXT: xorb %al, %sil -; CHECK-SSE1-NEXT: movzbl 28(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 28(%r9), %edx +; CHECK-SSE1-NEXT: movb 28(%r8), %al +; CHECK-SSE1-NEXT: movb 28(%r9), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 28(%r10), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl -; CHECK-SSE1-NEXT: movzbl 29(%r8), %eax -; CHECK-SSE1-NEXT: movzbl 29(%r9), %ecx +; CHECK-SSE1-NEXT: movb 29(%r8), %al +; CHECK-SSE1-NEXT: movb 29(%r9), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 29(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl -; CHECK-SSE1-NEXT: movzbl 30(%r8), %ebx -; CHECK-SSE1-NEXT: movzbl 30(%r9), %eax +; CHECK-SSE1-NEXT: movb 30(%r8), %bl +; CHECK-SSE1-NEXT: movb 30(%r9), %al ; CHECK-SSE1-NEXT: xorb %bl, %al ; CHECK-SSE1-NEXT: andb 30(%r10), %al ; CHECK-SSE1-NEXT: xorb %bl, %al -; CHECK-SSE1-NEXT: movzbl 31(%r8), %r8d -; CHECK-SSE1-NEXT: movzbl 31(%r9), %ebx +; CHECK-SSE1-NEXT: movb 31(%r8), %r8b +; CHECK-SSE1-NEXT: movb 31(%r9), %bl ; CHECK-SSE1-NEXT: xorb %r8b, %bl ; CHECK-SSE1-NEXT: andb 31(%r10), %bl ; CHECK-SSE1-NEXT: xorb %r8b, %bl @@ -1669,47 +1669,47 @@ define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-SSE1-NEXT: movb %r15b, 23(%r11) ; CHECK-SSE1-NEXT: movb %r12b, 22(%r11) ; CHECK-SSE1-NEXT: movb %r13b, 21(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 20(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 19(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 18(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 17(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 16(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 15(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 14(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 13(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 12(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 11(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 10(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 9(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 8(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 7(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 6(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 5(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 4(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 3(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 2(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 1(%r11) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, (%r11) ; CHECK-SSE1-NEXT: movq %r11, %rax ; CHECK-SSE1-NEXT: popq %rbx @@ -2367,9 +2367,9 @@ define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v4i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: xorb %r11b, %dl ; CHECK-BASELINE-NEXT: xorb %r10b, %cl @@ -2391,9 +2391,9 @@ define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-SSE1-LABEL: in_v4i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: xorb %r11b, %dl ; CHECK-SSE1-NEXT: xorb %r10b, %cl @@ -2500,22 +2500,22 @@ define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %r11b, %sil ; CHECK-BASELINE-NEXT: xorb %r12b, %dl ; CHECK-BASELINE-NEXT: xorb %r15b, %cl ; CHECK-BASELINE-NEXT: xorb %r14b, %r8b ; CHECK-BASELINE-NEXT: xorb %bpl, %r9b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %r13b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %bl -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b @@ -2558,22 +2558,22 @@ define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %r11b, %sil ; CHECK-SSE1-NEXT: xorb %r12b, %dl ; CHECK-SSE1-NEXT: xorb %r15b, %cl ; CHECK-SSE1-NEXT: xorb %r14b, %r8b ; CHECK-SSE1-NEXT: xorb %bpl, %r9b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %r13b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %bl -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b @@ -2764,62 +2764,62 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movq %rdi, %rdx -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %esi -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %sil +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-BASELINE-NEXT: xorb %dil, %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: xorb %dil, %r9b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-BASELINE-NEXT: xorb %r10b, %dil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil ; CHECK-BASELINE-NEXT: xorb %r10b, %dil -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: xorb %r11b, %r10b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: xorb %r11b, %r10b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %bl, %r11b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %bl, %r11b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb %r12b, %r13b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb %r12b, %r13b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: xorb %r15b, %r12b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: xorb %r15b, %r12b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: xorb %r14b, %r15b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: xorb %r14b, %r15b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %r14b ; CHECK-BASELINE-NEXT: xorb %bpl, %r14b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b ; CHECK-BASELINE-NEXT: xorb %bpl, %r14b -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %sil, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %sil, %cl @@ -2835,24 +2835,24 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind ; CHECK-BASELINE-NEXT: movb %r10b, 6(%rdx) ; CHECK-BASELINE-NEXT: movb %dil, 5(%rdx) ; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdx) -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %al, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %al, %r8b ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdx) -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, 2(%rdx) -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, 1(%rdx) -; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-BASELINE-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl @@ -2879,62 +2879,62 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movq %rdi, %rdx -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %sil +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-SSE1-NEXT: xorb %dil, %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: xorb %dil, %r9b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %dil ; CHECK-SSE1-NEXT: xorb %r10b, %dil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil ; CHECK-SSE1-NEXT: xorb %r10b, %dil -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: xorb %r11b, %r10b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: xorb %r11b, %r10b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %bl, %r11b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %bl, %r11b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb %r12b, %r13b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb %r12b, %r13b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: xorb %r15b, %r12b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: xorb %r15b, %r12b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: xorb %r14b, %r15b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: xorb %r14b, %r15b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %r14b ; CHECK-SSE1-NEXT: xorb %bpl, %r14b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b ; CHECK-SSE1-NEXT: xorb %bpl, %r14b -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %cl, %al -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %sil, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %sil, %cl @@ -2950,24 +2950,24 @@ define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind ; CHECK-SSE1-NEXT: movb %r10b, 6(%rdx) ; CHECK-SSE1-NEXT: movb %dil, 5(%rdx) ; CHECK-SSE1-NEXT: movb %r9b, 4(%rdx) -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %al, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %al, %r8b ; CHECK-SSE1-NEXT: movb %r8b, 3(%rdx) -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, 2(%rdx) -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, 1(%rdx) -; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax +; CHECK-SSE1-NEXT: movb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl @@ -3246,194 +3246,194 @@ define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-NEXT: movq %rdx, %r13 ; CHECK-BASELINE-NEXT: movq %rsi, %rbx ; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-BASELINE-NEXT: movzbl 15(%rdx), %r12d -; CHECK-BASELINE-NEXT: movzbl 14(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 15(%rdx), %r12b +; CHECK-BASELINE-NEXT: movb 14(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 13(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 13(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 12(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 12(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 11(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 11(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 10(%rdx), %eax +; CHECK-BASELINE-NEXT: movb 10(%rdx), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 9(%rdx), %r9d -; CHECK-BASELINE-NEXT: movzbl 8(%rdx), %r10d -; CHECK-BASELINE-NEXT: movzbl 7(%rdx), %r11d -; CHECK-BASELINE-NEXT: movzbl 6(%rdx), %r8d -; CHECK-BASELINE-NEXT: movzbl 5(%rdx), %ebp -; CHECK-BASELINE-NEXT: movzbl 4(%rdx), %esi -; CHECK-BASELINE-NEXT: movzbl 3(%rdx), %edi -; CHECK-BASELINE-NEXT: movzbl 2(%rdx), %r14d -; CHECK-BASELINE-NEXT: movzbl (%rdx), %eax -; CHECK-BASELINE-NEXT: movzbl 1(%rdx), %r15d -; CHECK-BASELINE-NEXT: movzbl (%rbx), %edx +; CHECK-BASELINE-NEXT: movb 9(%rdx), %r9b +; CHECK-BASELINE-NEXT: movb 8(%rdx), %r10b +; CHECK-BASELINE-NEXT: movb 7(%rdx), %r11b +; CHECK-BASELINE-NEXT: movb 6(%rdx), %r8b +; CHECK-BASELINE-NEXT: movb 5(%rdx), %bpl +; CHECK-BASELINE-NEXT: movb 4(%rdx), %sil +; CHECK-BASELINE-NEXT: movb 3(%rdx), %dil +; CHECK-BASELINE-NEXT: movb 2(%rdx), %r14b +; CHECK-BASELINE-NEXT: movb (%rdx), %al +; CHECK-BASELINE-NEXT: movb 1(%rdx), %r15b +; CHECK-BASELINE-NEXT: movb (%rbx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb (%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 1(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 1(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %r15b, %al ; CHECK-BASELINE-NEXT: andb 1(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r15b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 2(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 2(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: andb 2(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 3(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 3(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: andb 3(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 4(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 4(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: andb 4(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 5(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 5(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %bpl, %al ; CHECK-BASELINE-NEXT: andb 5(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %bpl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 6(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 6(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: andb 6(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 7(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 7(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: andb 7(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 8(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 8(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: andb 8(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 9(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 9(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %r9b, %al ; CHECK-BASELINE-NEXT: andb 9(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r9b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 10(%rbx), %edx -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 10(%rbx), %dl +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 10(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 11(%rbx), %edx -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 11(%rbx), %dl +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 11(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 12(%rbx), %edx -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 12(%rbx), %dl +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 12(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 13(%rbx), %edx -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 13(%rbx), %dl +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 13(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 14(%rbx), %edx -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb 14(%rbx), %dl +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 14(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 15(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 15(%rbx), %al ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: andb 15(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 16(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 16(%rbx), %edx +; CHECK-BASELINE-NEXT: movb 16(%r13), %al +; CHECK-BASELINE-NEXT: movb 16(%rbx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 16(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 17(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 17(%rbx), %edx +; CHECK-BASELINE-NEXT: movb 17(%r13), %al +; CHECK-BASELINE-NEXT: movb 17(%rbx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 17(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 18(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 18(%rbx), %edx +; CHECK-BASELINE-NEXT: movb 18(%r13), %al +; CHECK-BASELINE-NEXT: movb 18(%rbx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 18(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 19(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 19(%rbx), %r12d +; CHECK-BASELINE-NEXT: movb 19(%r13), %al +; CHECK-BASELINE-NEXT: movb 19(%rbx), %r12b ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: andb 19(%rcx), %r12b ; CHECK-BASELINE-NEXT: xorb %al, %r12b -; CHECK-BASELINE-NEXT: movzbl 20(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 20(%rbx), %r15d +; CHECK-BASELINE-NEXT: movb 20(%r13), %al +; CHECK-BASELINE-NEXT: movb 20(%rbx), %r15b ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: andb 20(%rcx), %r15b ; CHECK-BASELINE-NEXT: movq %rcx, %rsi ; CHECK-BASELINE-NEXT: xorb %al, %r15b -; CHECK-BASELINE-NEXT: movzbl 21(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 21(%rbx), %r14d +; CHECK-BASELINE-NEXT: movb 21(%r13), %al +; CHECK-BASELINE-NEXT: movb 21(%rbx), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: andb 21(%rcx), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b -; CHECK-BASELINE-NEXT: movzbl 22(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 22(%rbx), %ebp +; CHECK-BASELINE-NEXT: movb 22(%r13), %al +; CHECK-BASELINE-NEXT: movb 22(%rbx), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: andb 22(%rcx), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl -; CHECK-BASELINE-NEXT: movzbl 23(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 23(%rbx), %r11d +; CHECK-BASELINE-NEXT: movb 23(%r13), %al +; CHECK-BASELINE-NEXT: movb 23(%rbx), %r11b ; CHECK-BASELINE-NEXT: xorb %al, %r11b ; CHECK-BASELINE-NEXT: andb 23(%rcx), %r11b ; CHECK-BASELINE-NEXT: xorb %al, %r11b -; CHECK-BASELINE-NEXT: movzbl 24(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 24(%rbx), %r10d +; CHECK-BASELINE-NEXT: movb 24(%r13), %al +; CHECK-BASELINE-NEXT: movb 24(%rbx), %r10b ; CHECK-BASELINE-NEXT: xorb %al, %r10b ; CHECK-BASELINE-NEXT: andb 24(%rcx), %r10b ; CHECK-BASELINE-NEXT: xorb %al, %r10b -; CHECK-BASELINE-NEXT: movzbl 25(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 25(%rbx), %r9d +; CHECK-BASELINE-NEXT: movb 25(%r13), %al +; CHECK-BASELINE-NEXT: movb 25(%rbx), %r9b ; CHECK-BASELINE-NEXT: xorb %al, %r9b ; CHECK-BASELINE-NEXT: andb 25(%rcx), %r9b ; CHECK-BASELINE-NEXT: xorb %al, %r9b -; CHECK-BASELINE-NEXT: movzbl 26(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 26(%rbx), %r8d +; CHECK-BASELINE-NEXT: movb 26(%r13), %al +; CHECK-BASELINE-NEXT: movb 26(%rbx), %r8b ; CHECK-BASELINE-NEXT: xorb %al, %r8b ; CHECK-BASELINE-NEXT: andb 26(%rcx), %r8b ; CHECK-BASELINE-NEXT: xorb %al, %r8b -; CHECK-BASELINE-NEXT: movzbl 27(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 27(%rbx), %edi +; CHECK-BASELINE-NEXT: movb 27(%r13), %al +; CHECK-BASELINE-NEXT: movb 27(%rbx), %dil ; CHECK-BASELINE-NEXT: xorb %al, %dil ; CHECK-BASELINE-NEXT: andb 27(%rcx), %dil ; CHECK-BASELINE-NEXT: xorb %al, %dil -; CHECK-BASELINE-NEXT: movzbl 28(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 28(%rbx), %edx +; CHECK-BASELINE-NEXT: movb 28(%r13), %al +; CHECK-BASELINE-NEXT: movb 28(%rbx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 28(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl -; CHECK-BASELINE-NEXT: movzbl 29(%r13), %eax -; CHECK-BASELINE-NEXT: movzbl 29(%rbx), %ecx +; CHECK-BASELINE-NEXT: movb 29(%r13), %al +; CHECK-BASELINE-NEXT: movb 29(%rbx), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 29(%rsi), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl -; CHECK-BASELINE-NEXT: movzbl 30(%r13), %eax +; CHECK-BASELINE-NEXT: movb 30(%r13), %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-BASELINE-NEXT: movzbl 30(%rbx), %eax +; CHECK-BASELINE-NEXT: movb 30(%rbx), %al ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: andb 30(%rsi), %al ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload -; CHECK-BASELINE-NEXT: movzbl 31(%r13), %r13d -; CHECK-BASELINE-NEXT: movzbl 31(%rbx), %ebx +; CHECK-BASELINE-NEXT: movb 31(%r13), %r13b +; CHECK-BASELINE-NEXT: movb 31(%rbx), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: andb 31(%rsi), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl @@ -3451,43 +3451,43 @@ define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-NEXT: movb %r14b, 21(%r13) ; CHECK-BASELINE-NEXT: movb %r15b, 20(%r13) ; CHECK-BASELINE-NEXT: movb %r12b, 19(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 18(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 17(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 16(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 15(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 14(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 13(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 12(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 11(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 10(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 9(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 8(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 7(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 6(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 5(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 4(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 3(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 2(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 1(%r13) -; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-BASELINE-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-BASELINE-NEXT: movb %al, (%r13) ; CHECK-BASELINE-NEXT: movq %r13, %rax ; CHECK-BASELINE-NEXT: popq %rbx @@ -3509,194 +3509,194 @@ define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-SSE1-NEXT: movq %rdx, %r13 ; CHECK-SSE1-NEXT: movq %rsi, %rbx ; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-SSE1-NEXT: movzbl 15(%rdx), %r12d -; CHECK-SSE1-NEXT: movzbl 14(%rdx), %eax +; CHECK-SSE1-NEXT: movb 15(%rdx), %r12b +; CHECK-SSE1-NEXT: movb 14(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 13(%rdx), %eax +; CHECK-SSE1-NEXT: movb 13(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 12(%rdx), %eax +; CHECK-SSE1-NEXT: movb 12(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 11(%rdx), %eax +; CHECK-SSE1-NEXT: movb 11(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 10(%rdx), %eax +; CHECK-SSE1-NEXT: movb 10(%rdx), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 9(%rdx), %r9d -; CHECK-SSE1-NEXT: movzbl 8(%rdx), %r10d -; CHECK-SSE1-NEXT: movzbl 7(%rdx), %r11d -; CHECK-SSE1-NEXT: movzbl 6(%rdx), %r8d -; CHECK-SSE1-NEXT: movzbl 5(%rdx), %ebp -; CHECK-SSE1-NEXT: movzbl 4(%rdx), %esi -; CHECK-SSE1-NEXT: movzbl 3(%rdx), %edi -; CHECK-SSE1-NEXT: movzbl 2(%rdx), %r14d -; CHECK-SSE1-NEXT: movzbl (%rdx), %eax -; CHECK-SSE1-NEXT: movzbl 1(%rdx), %r15d -; CHECK-SSE1-NEXT: movzbl (%rbx), %edx +; CHECK-SSE1-NEXT: movb 9(%rdx), %r9b +; CHECK-SSE1-NEXT: movb 8(%rdx), %r10b +; CHECK-SSE1-NEXT: movb 7(%rdx), %r11b +; CHECK-SSE1-NEXT: movb 6(%rdx), %r8b +; CHECK-SSE1-NEXT: movb 5(%rdx), %bpl +; CHECK-SSE1-NEXT: movb 4(%rdx), %sil +; CHECK-SSE1-NEXT: movb 3(%rdx), %dil +; CHECK-SSE1-NEXT: movb 2(%rdx), %r14b +; CHECK-SSE1-NEXT: movb (%rdx), %al +; CHECK-SSE1-NEXT: movb 1(%rdx), %r15b +; CHECK-SSE1-NEXT: movb (%rbx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb (%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 1(%rbx), %eax +; CHECK-SSE1-NEXT: movb 1(%rbx), %al ; CHECK-SSE1-NEXT: xorb %r15b, %al ; CHECK-SSE1-NEXT: andb 1(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r15b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 2(%rbx), %eax +; CHECK-SSE1-NEXT: movb 2(%rbx), %al ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: andb 2(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 3(%rbx), %eax +; CHECK-SSE1-NEXT: movb 3(%rbx), %al ; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: andb 3(%rcx), %al ; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 4(%rbx), %eax +; CHECK-SSE1-NEXT: movb 4(%rbx), %al ; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: andb 4(%rcx), %al ; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 5(%rbx), %eax +; CHECK-SSE1-NEXT: movb 5(%rbx), %al ; CHECK-SSE1-NEXT: xorb %bpl, %al ; CHECK-SSE1-NEXT: andb 5(%rcx), %al ; CHECK-SSE1-NEXT: xorb %bpl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 6(%rbx), %eax +; CHECK-SSE1-NEXT: movb 6(%rbx), %al ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: andb 6(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 7(%rbx), %eax +; CHECK-SSE1-NEXT: movb 7(%rbx), %al ; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: andb 7(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 8(%rbx), %eax +; CHECK-SSE1-NEXT: movb 8(%rbx), %al ; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: andb 8(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 9(%rbx), %eax +; CHECK-SSE1-NEXT: movb 9(%rbx), %al ; CHECK-SSE1-NEXT: xorb %r9b, %al ; CHECK-SSE1-NEXT: andb 9(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r9b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 10(%rbx), %edx -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 10(%rbx), %dl +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 10(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 11(%rbx), %edx -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 11(%rbx), %dl +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 11(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 12(%rbx), %edx -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 12(%rbx), %dl +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 12(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 13(%rbx), %edx -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 13(%rbx), %dl +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 13(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 14(%rbx), %edx -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb 14(%rbx), %dl +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 14(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 15(%rbx), %eax +; CHECK-SSE1-NEXT: movb 15(%rbx), %al ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: andb 15(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 16(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 16(%rbx), %edx +; CHECK-SSE1-NEXT: movb 16(%r13), %al +; CHECK-SSE1-NEXT: movb 16(%rbx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 16(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 17(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 17(%rbx), %edx +; CHECK-SSE1-NEXT: movb 17(%r13), %al +; CHECK-SSE1-NEXT: movb 17(%rbx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 17(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 18(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 18(%rbx), %edx +; CHECK-SSE1-NEXT: movb 18(%r13), %al +; CHECK-SSE1-NEXT: movb 18(%rbx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 18(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 19(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 19(%rbx), %r12d +; CHECK-SSE1-NEXT: movb 19(%r13), %al +; CHECK-SSE1-NEXT: movb 19(%rbx), %r12b ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: andb 19(%rcx), %r12b ; CHECK-SSE1-NEXT: xorb %al, %r12b -; CHECK-SSE1-NEXT: movzbl 20(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 20(%rbx), %r15d +; CHECK-SSE1-NEXT: movb 20(%r13), %al +; CHECK-SSE1-NEXT: movb 20(%rbx), %r15b ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: andb 20(%rcx), %r15b ; CHECK-SSE1-NEXT: movq %rcx, %rsi ; CHECK-SSE1-NEXT: xorb %al, %r15b -; CHECK-SSE1-NEXT: movzbl 21(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 21(%rbx), %r14d +; CHECK-SSE1-NEXT: movb 21(%r13), %al +; CHECK-SSE1-NEXT: movb 21(%rbx), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: andb 21(%rcx), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b -; CHECK-SSE1-NEXT: movzbl 22(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 22(%rbx), %ebp +; CHECK-SSE1-NEXT: movb 22(%r13), %al +; CHECK-SSE1-NEXT: movb 22(%rbx), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: andb 22(%rcx), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl -; CHECK-SSE1-NEXT: movzbl 23(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 23(%rbx), %r11d +; CHECK-SSE1-NEXT: movb 23(%r13), %al +; CHECK-SSE1-NEXT: movb 23(%rbx), %r11b ; CHECK-SSE1-NEXT: xorb %al, %r11b ; CHECK-SSE1-NEXT: andb 23(%rcx), %r11b ; CHECK-SSE1-NEXT: xorb %al, %r11b -; CHECK-SSE1-NEXT: movzbl 24(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 24(%rbx), %r10d +; CHECK-SSE1-NEXT: movb 24(%r13), %al +; CHECK-SSE1-NEXT: movb 24(%rbx), %r10b ; CHECK-SSE1-NEXT: xorb %al, %r10b ; CHECK-SSE1-NEXT: andb 24(%rcx), %r10b ; CHECK-SSE1-NEXT: xorb %al, %r10b -; CHECK-SSE1-NEXT: movzbl 25(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 25(%rbx), %r9d +; CHECK-SSE1-NEXT: movb 25(%r13), %al +; CHECK-SSE1-NEXT: movb 25(%rbx), %r9b ; CHECK-SSE1-NEXT: xorb %al, %r9b ; CHECK-SSE1-NEXT: andb 25(%rcx), %r9b ; CHECK-SSE1-NEXT: xorb %al, %r9b -; CHECK-SSE1-NEXT: movzbl 26(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 26(%rbx), %r8d +; CHECK-SSE1-NEXT: movb 26(%r13), %al +; CHECK-SSE1-NEXT: movb 26(%rbx), %r8b ; CHECK-SSE1-NEXT: xorb %al, %r8b ; CHECK-SSE1-NEXT: andb 26(%rcx), %r8b ; CHECK-SSE1-NEXT: xorb %al, %r8b -; CHECK-SSE1-NEXT: movzbl 27(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 27(%rbx), %edi +; CHECK-SSE1-NEXT: movb 27(%r13), %al +; CHECK-SSE1-NEXT: movb 27(%rbx), %dil ; CHECK-SSE1-NEXT: xorb %al, %dil ; CHECK-SSE1-NEXT: andb 27(%rcx), %dil ; CHECK-SSE1-NEXT: xorb %al, %dil -; CHECK-SSE1-NEXT: movzbl 28(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 28(%rbx), %edx +; CHECK-SSE1-NEXT: movb 28(%r13), %al +; CHECK-SSE1-NEXT: movb 28(%rbx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 28(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl -; CHECK-SSE1-NEXT: movzbl 29(%r13), %eax -; CHECK-SSE1-NEXT: movzbl 29(%rbx), %ecx +; CHECK-SSE1-NEXT: movb 29(%r13), %al +; CHECK-SSE1-NEXT: movb 29(%rbx), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 29(%rsi), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl -; CHECK-SSE1-NEXT: movzbl 30(%r13), %eax +; CHECK-SSE1-NEXT: movb 30(%r13), %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-SSE1-NEXT: movzbl 30(%rbx), %eax +; CHECK-SSE1-NEXT: movb 30(%rbx), %al ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-SSE1-NEXT: andb 30(%rsi), %al ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload -; CHECK-SSE1-NEXT: movzbl 31(%r13), %r13d -; CHECK-SSE1-NEXT: movzbl 31(%rbx), %ebx +; CHECK-SSE1-NEXT: movb 31(%r13), %r13b +; CHECK-SSE1-NEXT: movb 31(%rbx), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: andb 31(%rsi), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl @@ -3714,43 +3714,43 @@ define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-SSE1-NEXT: movb %r14b, 21(%r13) ; CHECK-SSE1-NEXT: movb %r15b, 20(%r13) ; CHECK-SSE1-NEXT: movb %r12b, 19(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 18(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 17(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 16(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 15(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 14(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 13(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 12(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 11(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 10(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 9(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 8(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 7(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 6(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 5(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 4(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 3(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 2(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, 1(%r13) -; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload +; CHECK-SSE1-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload ; CHECK-SSE1-NEXT: movb %al, (%r13) ; CHECK-SSE1-NEXT: movq %r13, %rax ; CHECK-SSE1-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/urem-power-of-two.ll b/llvm/test/CodeGen/X86/urem-power-of-two.ll index 16dddfa7e819d2..60ae891a4820ad 100644 --- a/llvm/test/CodeGen/X86/urem-power-of-two.ll +++ b/llvm/test/CodeGen/X86/urem-power-of-two.ll @@ -26,7 +26,7 @@ define i64 @const_pow_2(i64 %x) { define i25 @shift_left_pow_2(i25 %x, i25 %y) { ; X86-LABEL: shift_left_pow_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $1, %eax ; X86-NEXT: shll %cl, %eax ; X86-NEXT: addl $33554431, %eax # imm = 0x1FFFFFF @@ -52,7 +52,7 @@ define i25 @shift_left_pow_2(i25 %x, i25 %y) { define i16 @shift_right_pow_2(i16 %x, i16 %y) { ; X86-LABEL: shift_right_pow_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $32768, %eax # imm = 0x8000 ; X86-NEXT: shrl %cl, %eax ; X86-NEXT: decl %eax @@ -80,7 +80,7 @@ define i16 @shift_right_pow_2(i16 %x, i16 %y) { define i8 @and_pow_2(i8 %x, i8 %y) { ; X86-LABEL: and_pow_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $4, %cl ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: divb %cl diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll index fd496780f535df..2a38afd7a782ba 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll @@ -158,9 +158,9 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; SSE2-NEXT: andps %xmm1, %xmm3 ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 ; SSE2-NEXT: movdqa %xmm3, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_urem_vec: diff --git a/llvm/test/CodeGen/X86/ushl_sat.ll b/llvm/test/CodeGen/X86/ushl_sat.ll index a4f0656c13aff0..a3c4dd6e46ad94 100644 --- a/llvm/test/CodeGen/X86/ushl_sat.ll +++ b/llvm/test/CodeGen/X86/ushl_sat.ll @@ -30,7 +30,7 @@ define i16 @func(i16 %x, i16 %y) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl %eax, %edx ; X86-NEXT: shll %cl, %edx ; X86-NEXT: movzwl %dx, %edx @@ -69,7 +69,7 @@ define i16 @func2(i8 %x, i8 %y) nounwind { ; X86-LABEL: func2: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl %eax, %eax ; X86-NEXT: movl %eax, %edx @@ -162,9 +162,9 @@ define i4 @func4(i4 %x, i4 %y) nounwind { ; X86-LABEL: func4: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: andb $15, %cl -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: movl %eax, %edx ; X86-NEXT: shlb %cl, %dl @@ -201,7 +201,7 @@ define i64 @func5(i64 %x, i64 %y) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl %esi, %edi @@ -255,7 +255,7 @@ define i18 @func6(i16 %x, i16 %y) nounwind { ; X86-LABEL: func6: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $14, %eax ; X86-NEXT: movl %eax, %edx @@ -291,7 +291,7 @@ define i32 @func7(i32 %x, i32 %y) nounwind { ; X86-LABEL: func7: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %edx ; X86-NEXT: shll %cl, %edx @@ -325,8 +325,8 @@ define i8 @func8(i8 %x, i8 %y) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %edx ; X86-NEXT: shlb %cl, %dl ; X86-NEXT: movzbl %dl, %esi diff --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll index 871e9059c20cd0..5904892e7f2406 100644 --- a/llvm/test/CodeGen/X86/ushl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll @@ -67,7 +67,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movb {{[0-9]+}}(%esp), %ch -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movb {{[0-9]+}}(%esp), %dl ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx @@ -94,7 +94,7 @@ define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-NEXT: cmpl %edi, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: cmovnel %ebx, %edx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: shll %cl, %ebp ; X86-NEXT: movl %ebp, %edi ; X86-NEXT: shrl %cl, %edi diff --git a/llvm/test/CodeGen/X86/usub_sat.ll b/llvm/test/CodeGen/X86/usub_sat.ll index 6749a1f9147aff..8ac20843259a2d 100644 --- a/llvm/test/CodeGen/X86/usub_sat.ll +++ b/llvm/test/CodeGen/X86/usub_sat.ll @@ -74,7 +74,7 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y) nounwind { define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: subb {{[0-9]+}}(%esp), %al ; X86-NEXT: movzbl %al, %eax @@ -97,7 +97,7 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y) nounwind { define zeroext i4 @func3(i4 zeroext %x, i4 zeroext %y) nounwind { ; X86-LABEL: func3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: subb {{[0-9]+}}(%esp), %al ; X86-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/usub_sat_plus.ll b/llvm/test/CodeGen/X86/usub_sat_plus.ll index 0fb14ad5cf7b02..e58a098265ec5f 100644 --- a/llvm/test/CodeGen/X86/usub_sat_plus.ll +++ b/llvm/test/CodeGen/X86/usub_sat_plus.ll @@ -82,8 +82,8 @@ define zeroext i16 @func16(i16 zeroext %x, i16 zeroext %y, i16 zeroext %z) nounw define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind { ; X86-LABEL: func8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: mulb {{[0-9]+}}(%esp) ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: subb %al, %cl @@ -111,8 +111,8 @@ define zeroext i8 @func8(i8 zeroext %x, i8 zeroext %y, i8 zeroext %z) nounwind { define zeroext i4 @func4(i4 zeroext %x, i4 zeroext %y, i4 zeroext %z) nounwind { ; X86-LABEL: func4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: mulb {{[0-9]+}}(%esp) ; X86-NEXT: andb $15, %al ; X86-NEXT: xorl %edx, %edx diff --git a/llvm/test/CodeGen/X86/usub_sat_vec.ll b/llvm/test/CodeGen/X86/usub_sat_vec.ll index 6a49f74f1ddbd0..383d3ad24ad7c6 100644 --- a/llvm/test/CodeGen/X86/usub_sat_vec.ll +++ b/llvm/test/CodeGen/X86/usub_sat_vec.ll @@ -429,7 +429,7 @@ define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; SSE-LABEL: v1i8: ; SSE: # %bb.0: -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: xorl %ecx, %ecx ; SSE-NEXT: subb (%rsi), %al ; SSE-NEXT: movzbl %al, %eax @@ -439,7 +439,7 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; ; AVX-LABEL: v1i8: ; AVX: # %bb.0: -; AVX-NEXT: movzbl (%rdi), %eax +; AVX-NEXT: movb (%rdi), %al ; AVX-NEXT: xorl %ecx, %ecx ; AVX-NEXT: subb (%rsi), %al ; AVX-NEXT: movzbl %al, %eax diff --git a/llvm/test/CodeGen/X86/vec_setcc.ll b/llvm/test/CodeGen/X86/vec_setcc.ll index 09d655ae5dce99..9f5815e1f78699 100644 --- a/llvm/test/CodeGen/X86/vec_setcc.ll +++ b/llvm/test/CodeGen/X86/vec_setcc.ll @@ -208,9 +208,9 @@ define <3 x i1> @test_setcc_v3i1_v3i16(ptr %a) nounwind { ; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] ; SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %edx -; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %al +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %dl +; SSE2-NEXT: movb -{{[0-9]+}}(%rsp), %cl ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_setcc_v3i1_v3i16: diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 61980ce083c2bd..c53641d4224590 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -1342,7 +1342,7 @@ entry: define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { ; SSE-LABEL: load_sext_2i1_to_2i64: ; SSE: # %bb.0: # %entry -; SSE-NEXT: movzbl (%rdi), %eax +; SSE-NEXT: movb (%rdi), %al ; SSE-NEXT: movzbl %al, %ecx ; SSE-NEXT: shrb %al ; SSE-NEXT: movzbl %al, %eax @@ -1356,7 +1356,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { ; ; AVX1-LABEL: load_sext_2i1_to_2i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movzbl (%rdi), %eax +; AVX1-NEXT: movb (%rdi), %al ; AVX1-NEXT: movzbl %al, %ecx ; AVX1-NEXT: shrb %al ; AVX1-NEXT: movzbl %al, %eax @@ -1370,7 +1370,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { ; ; AVX2-LABEL: load_sext_2i1_to_2i64: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movzbl (%rdi), %eax +; AVX2-NEXT: movb (%rdi), %al ; AVX2-NEXT: movzbl %al, %ecx ; AVX2-NEXT: shrb %al ; AVX2-NEXT: movzbl %al, %eax @@ -1403,7 +1403,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { ; X86-SSE2-LABEL: load_sext_2i1_to_2i64: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl (%eax), %eax +; X86-SSE2-NEXT: movb (%eax), %al ; X86-SSE2-NEXT: movzbl %al, %ecx ; X86-SSE2-NEXT: shrb %al ; X86-SSE2-NEXT: movzbl %al, %eax @@ -1420,7 +1420,7 @@ define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { ; X86-SSE41-LABEL: load_sext_2i1_to_2i64: ; X86-SSE41: # %bb.0: # %entry ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movzbl (%eax), %eax +; X86-SSE41-NEXT: movb (%eax), %al ; X86-SSE41-NEXT: movzbl %al, %ecx ; X86-SSE41-NEXT: andl $1, %ecx ; X86-SSE41-NEXT: negl %ecx @@ -1500,7 +1500,7 @@ entry: define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) { ; SSE2-LABEL: load_sext_4i1_to_4i32: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movzbl (%rdi), %eax +; SSE2-NEXT: movb (%rdi), %al ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: shrb $3, %cl ; SSE2-NEXT: movzbl %cl, %ecx @@ -1528,7 +1528,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) { ; ; SSSE3-LABEL: load_sext_4i1_to_4i32: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movzbl (%rdi), %eax +; SSSE3-NEXT: movb (%rdi), %al ; SSSE3-NEXT: movl %eax, %ecx ; SSSE3-NEXT: shrb $3, %cl ; SSSE3-NEXT: movzbl %cl, %ecx @@ -1556,7 +1556,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) { ; ; SSE41-LABEL: load_sext_4i1_to_4i32: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: movzbl (%rdi), %eax +; SSE41-NEXT: movb (%rdi), %al ; SSE41-NEXT: movzbl %al, %ecx ; SSE41-NEXT: shrb %al ; SSE41-NEXT: movzbl %al, %eax @@ -1581,7 +1581,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) { ; ; AVX1-LABEL: load_sext_4i1_to_4i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movzbl (%rdi), %eax +; AVX1-NEXT: movb (%rdi), %al ; AVX1-NEXT: movzbl %al, %ecx ; AVX1-NEXT: shrb %al ; AVX1-NEXT: movzbl %al, %eax @@ -1606,7 +1606,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) { ; ; AVX2-LABEL: load_sext_4i1_to_4i32: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movzbl (%rdi), %eax +; AVX2-NEXT: movb (%rdi), %al ; AVX2-NEXT: movzbl %al, %ecx ; AVX2-NEXT: shrb %al ; AVX2-NEXT: movzbl %al, %eax @@ -1650,7 +1650,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) { ; X86-SSE2-LABEL: load_sext_4i1_to_4i32: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl (%eax), %eax +; X86-SSE2-NEXT: movb (%eax), %al ; X86-SSE2-NEXT: movl %eax, %ecx ; X86-SSE2-NEXT: shrb $3, %cl ; X86-SSE2-NEXT: movzbl %cl, %ecx @@ -1679,7 +1679,7 @@ define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) { ; X86-SSE41-LABEL: load_sext_4i1_to_4i32: ; X86-SSE41: # %bb.0: # %entry ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movzbl (%eax), %eax +; X86-SSE41-NEXT: movb (%eax), %al ; X86-SSE41-NEXT: movl %eax, %ecx ; X86-SSE41-NEXT: shrb %cl ; X86-SSE41-NEXT: movzbl %cl, %ecx @@ -1757,7 +1757,7 @@ entry: define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) { ; SSE2-LABEL: load_sext_4i1_to_4i64: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movzbl (%rdi), %eax +; SSE2-NEXT: movb (%rdi), %al ; SSE2-NEXT: movl %eax, %ecx ; SSE2-NEXT: shrb %cl ; SSE2-NEXT: andb $1, %cl @@ -1787,7 +1787,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) { ; ; SSSE3-LABEL: load_sext_4i1_to_4i64: ; SSSE3: # %bb.0: # %entry -; SSSE3-NEXT: movzbl (%rdi), %eax +; SSSE3-NEXT: movb (%rdi), %al ; SSSE3-NEXT: movl %eax, %ecx ; SSSE3-NEXT: shrb %cl ; SSSE3-NEXT: andb $1, %cl @@ -1817,7 +1817,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) { ; ; SSE41-LABEL: load_sext_4i1_to_4i64: ; SSE41: # %bb.0: # %entry -; SSE41-NEXT: movzbl (%rdi), %eax +; SSE41-NEXT: movb (%rdi), %al ; SSE41-NEXT: movl %eax, %ecx ; SSE41-NEXT: shrb %cl ; SSE41-NEXT: andb $1, %cl @@ -1847,7 +1847,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) { ; ; AVX1-LABEL: load_sext_4i1_to_4i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: movzbl (%rdi), %eax +; AVX1-NEXT: movb (%rdi), %al ; AVX1-NEXT: movzbl %al, %ecx ; AVX1-NEXT: shrb %al ; AVX1-NEXT: movzbl %al, %eax @@ -1876,7 +1876,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) { ; ; AVX2-LABEL: load_sext_4i1_to_4i64: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: movzbl (%rdi), %eax +; AVX2-NEXT: movb (%rdi), %al ; AVX2-NEXT: movl %eax, %ecx ; AVX2-NEXT: shrb $3, %cl ; AVX2-NEXT: movzbl %cl, %ecx @@ -1921,7 +1921,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) { ; X86-SSE2-LABEL: load_sext_4i1_to_4i64: ; X86-SSE2: # %bb.0: # %entry ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE2-NEXT: movzbl (%eax), %eax +; X86-SSE2-NEXT: movb (%eax), %al ; X86-SSE2-NEXT: movl %eax, %ecx ; X86-SSE2-NEXT: shrb %cl ; X86-SSE2-NEXT: andb $1, %cl @@ -1952,7 +1952,7 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) { ; X86-SSE41-LABEL: load_sext_4i1_to_4i64: ; X86-SSE41: # %bb.0: # %entry ; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE41-NEXT: movzbl (%eax), %eax +; X86-SSE41-NEXT: movb (%eax), %al ; X86-SSE41-NEXT: movl %eax, %ecx ; X86-SSE41-NEXT: shrb %cl ; X86-SSE41-NEXT: andb $1, %cl diff --git a/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll b/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll index dd61ec629c2f0d..ea1dfa64c60e97 100644 --- a/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll +++ b/llvm/test/CodeGen/X86/volatile-memstores-nooverlapping-load-stores.ll @@ -17,7 +17,7 @@ define dso_local void @copy_7_bytes(ptr noalias nocapture, ptr noalias nocapture define dso_local void @copy_7_bytes_volatile(ptr noalias nocapture, ptr noalias nocapture readonly) nounwind #0 { ; CHECK-LABEL: copy_7_bytes_volatile: ; CHECK: # %bb.0: -; CHECK-NEXT: movzbl 6(%rsi), %eax +; CHECK-NEXT: movb 6(%rsi), %al ; CHECK-NEXT: movb %al, 6(%rdi) ; CHECK-NEXT: movzwl 4(%rsi), %eax ; CHECK-NEXT: movw %ax, 4(%rdi) @@ -35,7 +35,7 @@ define dso_local void @move_7_bytes(ptr nocapture, ptr nocapture readonly) nounw ; CHECK: # %bb.0: ; CHECK-NEXT: movl (%rsi), %eax ; CHECK-NEXT: movzwl 4(%rsi), %ecx -; CHECK-NEXT: movzbl 6(%rsi), %edx +; CHECK-NEXT: movb 6(%rsi), %dl ; CHECK-NEXT: movb %dl, 6(%rdi) ; CHECK-NEXT: movw %cx, 4(%rdi) ; CHECK-NEXT: movl %eax, (%rdi) @@ -48,7 +48,7 @@ define dso_local void @move_7_bytes_volatile(ptr nocapture, ptr nocapture readon ; CHECK: # %bb.0: ; CHECK-NEXT: movl (%rsi), %eax ; CHECK-NEXT: movzwl 4(%rsi), %ecx -; CHECK-NEXT: movzbl 6(%rsi), %edx +; CHECK-NEXT: movb 6(%rsi), %dl ; CHECK-NEXT: movb %dl, 6(%rdi) ; CHECK-NEXT: movw %cx, 4(%rdi) ; CHECK-NEXT: movl %eax, (%rdi) diff --git a/llvm/test/CodeGen/X86/xchg-nofold.ll b/llvm/test/CodeGen/X86/xchg-nofold.ll index 17e7781b21e0b8..c41177c3384688 100644 --- a/llvm/test/CodeGen/X86/xchg-nofold.ll +++ b/llvm/test/CodeGen/X86/xchg-nofold.ll @@ -13,7 +13,7 @@ define zeroext i1 @_Z3fooRSt6atomicIbEb(ptr nocapture dereferenceable(1) %a, i1 ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: movq %rdi, %rcx ; CHECK-NEXT: shrq $3, %rcx -; CHECK-NEXT: movzbl 2147450880(%rcx), %ecx +; CHECK-NEXT: movb 2147450880(%rcx), %cl ; CHECK-NEXT: testb %cl, %cl ; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll index 4adc80b3b8bd66..c3cc7848fc5826 100644 --- a/llvm/test/CodeGen/X86/xmulo.ll +++ b/llvm/test/CodeGen/X86/xmulo.ll @@ -91,7 +91,7 @@ define zeroext i1 @smuloi8(i8 %v1, i8 %v2, ptr %res) { ; WIN32-LABEL: smuloi8: ; WIN32: # %bb.0: ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al ; WIN32-NEXT: imulb {{[0-9]+}}(%esp) ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) @@ -322,7 +322,7 @@ define zeroext i1 @umuloi8(i8 %v1, i8 %v2, ptr %res) { ; WIN32-LABEL: umuloi8: ; WIN32: # %bb.0: ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al ; WIN32-NEXT: mulb {{[0-9]+}}(%esp) ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) @@ -806,7 +806,7 @@ define zeroext i1 @smulobri8(i8 %v1, i8 %v2) { ; ; WIN32-LABEL: smulobri8: ; WIN32: # %bb.0: -; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al ; WIN32-NEXT: imulb {{[0-9]+}}(%esp) ; WIN32-NEXT: jo LBB15_1 ; WIN32-NEXT: # %bb.2: # %continue @@ -1122,7 +1122,7 @@ define zeroext i1 @umulobri8(i8 %v1, i8 %v2) { ; ; WIN32-LABEL: umulobri8: ; WIN32: # %bb.0: -; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al ; WIN32-NEXT: mulb {{[0-9]+}}(%esp) ; WIN32-NEXT: jo LBB19_1 ; WIN32-NEXT: # %bb.2: # %continue @@ -1425,7 +1425,7 @@ define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { ; ; FAST-LABEL: smuloi8_load: ; FAST: # %bb.0: -; FAST-NEXT: movzbl (%rdi), %eax +; FAST-NEXT: movb (%rdi), %al ; FAST-NEXT: imulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -1446,7 +1446,7 @@ define zeroext i1 @smuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { ; WIN32: # %bb.0: ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movzbl (%eax), %eax +; WIN32-NEXT: movb (%eax), %al ; WIN32-NEXT: imulb {{[0-9]+}}(%esp) ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) @@ -1494,7 +1494,7 @@ define zeroext i1 @smuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) { ; WIN32-LABEL: smuloi8_load2: ; WIN32: # %bb.0: ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: imulb (%ecx) ; WIN32-NEXT: seto %cl @@ -1899,7 +1899,7 @@ define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { ; ; FAST-LABEL: umuloi8_load: ; FAST: # %bb.0: -; FAST-NEXT: movzbl (%rdi), %eax +; FAST-NEXT: movb (%rdi), %al ; FAST-NEXT: mulb %sil ; FAST-NEXT: seto %cl ; FAST-NEXT: movb %al, (%rdx) @@ -1920,7 +1920,7 @@ define zeroext i1 @umuloi8_load(ptr %ptr1, i8 %v2, ptr %res) { ; WIN32: # %bb.0: ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax -; WIN32-NEXT: movzbl (%eax), %eax +; WIN32-NEXT: movb (%eax), %al ; WIN32-NEXT: mulb {{[0-9]+}}(%esp) ; WIN32-NEXT: seto %cl ; WIN32-NEXT: movb %al, (%edx) @@ -1968,7 +1968,7 @@ define zeroext i1 @umuloi8_load2(i8 %v1, ptr %ptr2, ptr %res) { ; WIN32-LABEL: umuloi8_load2: ; WIN32: # %bb.0: ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx -; WIN32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movb {{[0-9]+}}(%esp), %al ; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; WIN32-NEXT: mulb (%ecx) ; WIN32-NEXT: seto %cl diff --git a/llvm/test/CodeGen/X86/xor-icmp.ll b/llvm/test/CodeGen/X86/xor-icmp.ll index d1254ada195002..08eb99f02ee7ca 100644 --- a/llvm/test/CodeGen/X86/xor-icmp.ll +++ b/llvm/test/CodeGen/X86/xor-icmp.ll @@ -6,7 +6,7 @@ define i32 @t(i32 %a, i32 %b) nounwind ssp { ; X86-LABEL: t: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al ; X86-NEXT: testb $64, %al ; X86-NEXT: je .LBB0_1 @@ -94,7 +94,7 @@ return: ; preds = %entry define i1 @xor_not_bools(i1 zeroext %x, i1 zeroext %y) nounwind { ; X86-LABEL: xor_not_bools: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorb $1, %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/xor-lea.ll b/llvm/test/CodeGen/X86/xor-lea.ll index 10e9525a2706a3..8e8ab5625b5583 100644 --- a/llvm/test/CodeGen/X86/xor-lea.ll +++ b/llvm/test/CodeGen/X86/xor-lea.ll @@ -15,7 +15,7 @@ define i8 @xor_sminval_i8(i8 %x) { ; X86-LABEL: xor_sminval_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb $-128, %al ; X86-NEXT: retl ; @@ -33,7 +33,7 @@ define i8 @xor_sminval_i8(i8 %x) { define i8 @xor_notsminval_i8(i8 %x) { ; X86-LABEL: xor_notsminval_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: xorb $127, %al ; X86-NEXT: retl ; @@ -122,7 +122,7 @@ define i64 @xor_sminval_i64(i64 %x) { define i8 @xor_add_sminval_i8(i8 %x, i8 %y) { ; X86-LABEL: xor_add_sminval_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb $-128, %al ; X86-NEXT: retl @@ -204,7 +204,7 @@ define i64 @xor_add_sminval_i64(i64 %x, i64 %y) { define i8 @sub_xor_sminval_i8(i8 %x, i8 %y) { ; X86-LABEL: sub_xor_sminval_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb $-128, %al ; X86-NEXT: subb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl @@ -285,7 +285,7 @@ define i64 @add_xor_sminval_i64(i64 %x, i64 %y) { define i8 @xor_shl_sminval_i8(i8 %x) { ; X86-LABEL: xor_shl_sminval_i8: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: addb %al, %al ; X86-NEXT: addb $-128, %al ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/xor-with-overflow.ll b/llvm/test/CodeGen/X86/xor-with-overflow.ll index 5d22302d39add9..96533a7798bd22 100644 --- a/llvm/test/CodeGen/X86/xor-with-overflow.ll +++ b/llvm/test/CodeGen/X86/xor-with-overflow.ll @@ -9,7 +9,7 @@ define i8 @xor_i8_ri(i8 zeroext %0, i8 zeroext %1) { ; X86-LABEL: xor_i8_ri: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorb $-17, %cl ; X86-NEXT: je .LBB0_2 @@ -35,8 +35,8 @@ define i8 @xor_i8_ri(i8 zeroext %0, i8 zeroext %1) { define i8 @xor_i8_rr(i8 zeroext %0, i8 zeroext %1) { ; X86-LABEL: xor_i8_rr: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: xorb %al, %cl ; X86-NEXT: je .LBB1_2 ; X86-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/X86/xor.ll b/llvm/test/CodeGen/X86/xor.ll index c392dd650da9ab..eccae2885edb45 100644 --- a/llvm/test/CodeGen/X86/xor.ll +++ b/llvm/test/CodeGen/X86/xor.ll @@ -192,8 +192,8 @@ bb12: define i8 @test6(i8 %a, i8 %b) nounwind { ; X86-LABEL: test6: ; X86: # %bb.0: # %entry -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: .p2align 4, 0x90 ; X86-NEXT: .LBB5_1: # %bb ; X86-NEXT: # =>This Inner Loop Header: Depth=1 @@ -394,7 +394,7 @@ define <4 x i32> @test10(<4 x i32> %a) nounwind { define i32 @PR17487(i1 %tobool) { ; X86-LABEL: PR17487: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: notb %cl ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: testb $1, %cl @@ -425,7 +425,7 @@ define i32 @PR17487(i1 %tobool) { define i32 @test11(i32 %b) { ; X86-LABEL: test11: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NEXT: movl $-2, %eax ; X86-NEXT: roll %cl, %eax ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll index da402d81db9fe9..81a413b32c1a18 100644 --- a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll +++ b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll @@ -55,7 +55,7 @@ define i64 @test3(ptr %data) { ; X86-LABEL: test3: ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: movb (%eax), %al ; X86-NEXT: shlb $2, %al ; X86-NEXT: xorb $60, %al ; X86-NEXT: movzbl %al, %eax @@ -64,7 +64,7 @@ define i64 @test3(ptr %data) { ; ; X64-LABEL: test3: ; X64: # %bb.0: # %entry -; X64-NEXT: movzbl (%rdi), %eax +; X64-NEXT: movb (%rdi), %al ; X64-NEXT: shlb $2, %al ; X64-NEXT: xorb $60, %al ; X64-NEXT: movzbl %al, %eax