diff --git a/llvm/test/CodeGen/X86/sttni.ll b/llvm/test/CodeGen/X86/sttni.ll index 021fb2ce15a952..53ba7d61fbd518 100644 --- a/llvm/test/CodeGen/X86/sttni.ll +++ b/llvm/test/CodeGen/X86/sttni.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) @@ -10,13 +10,13 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8>, i8) declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8>, i8) define i1 @pcmpestri_reg_eq_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_reg_eq_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X32-NEXT: setae %al -; X32-NEXT: retl +; X86-LABEL: pcmpestri_reg_eq_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 +; X86-NEXT: setae %al +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_reg_eq_i8: ; X64: # %bb.0: # %entry @@ -32,13 +32,13 @@ entry: } define i32 @pcmpestri_reg_idx_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_reg_idx_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: retl +; X86-LABEL: pcmpestri_reg_idx_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_reg_idx_i8: ; X64: # %bb.0: # %entry @@ -53,31 +53,31 @@ entry: } define i32 @pcmpestri_reg_diff_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_reg_diff_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 12(%ebp), %edx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X32-NEXT: cmpl $16, %ecx -; X32-NEXT: jne .LBB2_2 -; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: jmp .LBB2_3 -; X32-NEXT: .LBB2_2: # %compare -; X32-NEXT: movdqa %xmm0, (%esp) -; X32-NEXT: andl $15, %ecx -; X32-NEXT: movb (%esp,%ecx), %al -; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) -; X32-NEXT: subb 16(%esp,%ecx), %al -; X32-NEXT: .LBB2_3: # %exit -; X32-NEXT: movzbl %al, %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X86-LABEL: pcmpestri_reg_diff_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %edx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 +; X86-NEXT: cmpl $16, %ecx +; X86-NEXT: jne .LBB2_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB2_3 +; X86-NEXT: .LBB2_2: # %compare +; X86-NEXT: movdqa %xmm0, (%esp) +; X86-NEXT: andl $15, %ecx +; X86-NEXT: movb (%esp,%ecx), %al +; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) +; X86-NEXT: subb 16(%esp,%ecx), %al +; X86-NEXT: .LBB2_3: # %exit +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_reg_diff_i8: ; X64: # %bb.0: # %entry @@ -117,18 +117,18 @@ exit: } define i1 @pcmpestri_mem_eq_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_mem_eq_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movdqu (%esi), %xmm0 -; X32-NEXT: pcmpestri $24, (%ecx), %xmm0 -; X32-NEXT: setae %al -; X32-NEXT: popl %esi -; X32-NEXT: retl +; X86-LABEL: pcmpestri_mem_eq_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movdqu (%esi), %xmm0 +; X86-NEXT: pcmpestri $24, (%ecx), %xmm0 +; X86-NEXT: setae %al +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_mem_eq_i8: ; X64: # %bb.0: # %entry @@ -150,18 +150,18 @@ entry: } define i32 @pcmpestri_mem_idx_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_mem_idx_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movdqu (%esi), %xmm0 -; X32-NEXT: pcmpestri $24, (%ecx), %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: popl %esi -; X32-NEXT: retl +; X86-LABEL: pcmpestri_mem_idx_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movdqu (%esi), %xmm0 +; X86-NEXT: pcmpestri $24, (%ecx), %xmm0 +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_mem_idx_i8: ; X64: # %bb.0: # %entry @@ -182,37 +182,37 @@ entry: } define i32 @pcmpestri_mem_diff_i8(i8* %lhs_ptr, i32 %lhs_len, i8* %rhs_ptr, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_mem_diff_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: pushl %esi -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 20(%ebp), %edx -; X32-NEXT: movl 16(%ebp), %ecx -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: movdqu (%esi), %xmm1 -; X32-NEXT: movdqu (%ecx), %xmm0 -; X32-NEXT: pcmpestri $24, %xmm0, %xmm1 -; X32-NEXT: cmpl $16, %ecx -; X32-NEXT: jne .LBB5_2 -; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: jmp .LBB5_3 -; X32-NEXT: .LBB5_2: # %compare -; X32-NEXT: movdqa %xmm1, (%esp) -; X32-NEXT: andl $15, %ecx -; X32-NEXT: movb (%esp,%ecx), %al -; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: subb 16(%esp,%ecx), %al -; X32-NEXT: .LBB5_3: # %exit -; X32-NEXT: movzbl %al, %eax -; X32-NEXT: leal -4(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X86-LABEL: pcmpestri_mem_diff_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %edx +; X86-NEXT: movl 16(%ebp), %ecx +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movdqu (%esi), %xmm1 +; X86-NEXT: movdqu (%ecx), %xmm0 +; X86-NEXT: pcmpestri $24, %xmm0, %xmm1 +; X86-NEXT: cmpl $16, %ecx +; X86-NEXT: jne .LBB5_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB5_3 +; X86-NEXT: .LBB5_2: # %compare +; X86-NEXT: movdqa %xmm1, (%esp) +; X86-NEXT: andl $15, %ecx +; X86-NEXT: movb (%esp,%ecx), %al +; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: subb 16(%esp,%ecx), %al +; X86-NEXT: .LBB5_3: # %exit +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: leal -4(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_mem_diff_i8: ; X64: # %bb.0: # %entry @@ -258,13 +258,13 @@ exit: } define i1 @pcmpestri_reg_eq_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_reg_eq_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X32-NEXT: setae %al -; X32-NEXT: retl +; X86-LABEL: pcmpestri_reg_eq_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 +; X86-NEXT: setae %al +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_reg_eq_i16: ; X64: # %bb.0: # %entry @@ -282,13 +282,13 @@ entry: } define i32 @pcmpestri_reg_idx_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_reg_idx_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: retl +; X86-LABEL: pcmpestri_reg_idx_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_reg_idx_i16: ; X64: # %bb.0: # %entry @@ -305,32 +305,32 @@ entry: } define i32 @pcmpestri_reg_diff_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_reg_diff_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl 8(%ebp), %eax -; X32-NEXT: movl 12(%ebp), %edx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X32-NEXT: cmpl $16, %ecx -; X32-NEXT: jne .LBB8_2 -; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: jmp .LBB8_3 -; X32-NEXT: .LBB8_2: # %compare -; X32-NEXT: movdqa %xmm0, (%esp) -; X32-NEXT: addl %ecx, %ecx -; X32-NEXT: andl $14, %ecx -; X32-NEXT: movzwl (%esp,%ecx), %eax -; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) -; X32-NEXT: subw 16(%esp,%ecx), %ax -; X32-NEXT: .LBB8_3: # %exit -; X32-NEXT: movzwl %ax, %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X86-LABEL: pcmpestri_reg_diff_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %edx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 +; X86-NEXT: cmpl $16, %ecx +; X86-NEXT: jne .LBB8_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB8_3 +; X86-NEXT: .LBB8_2: # %compare +; X86-NEXT: movdqa %xmm0, (%esp) +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: andl $14, %ecx +; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) +; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: .LBB8_3: # %exit +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_reg_diff_i16: ; X64: # %bb.0: # %entry @@ -372,18 +372,18 @@ exit: } define i1 @pcmpestri_mem_eq_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_mem_eq_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movdqu (%esi), %xmm0 -; X32-NEXT: pcmpestri $25, (%ecx), %xmm0 -; X32-NEXT: setae %al -; X32-NEXT: popl %esi -; X32-NEXT: retl +; X86-LABEL: pcmpestri_mem_eq_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movdqu (%esi), %xmm0 +; X86-NEXT: pcmpestri $25, (%ecx), %xmm0 +; X86-NEXT: setae %al +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_mem_eq_i16: ; X64: # %bb.0: # %entry @@ -407,18 +407,18 @@ entry: } define i32 @pcmpestri_mem_idx_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_mem_idx_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movdqu (%esi), %xmm0 -; X32-NEXT: pcmpestri $25, (%ecx), %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: popl %esi -; X32-NEXT: retl +; X86-LABEL: pcmpestri_mem_idx_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movdqu (%esi), %xmm0 +; X86-NEXT: pcmpestri $25, (%ecx), %xmm0 +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_mem_idx_i16: ; X64: # %bb.0: # %entry @@ -441,38 +441,38 @@ entry: } define i32 @pcmpestri_mem_diff_i16(i16* %lhs_ptr, i32 %lhs_len, i16* %rhs_ptr, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_mem_diff_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: pushl %esi -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 20(%ebp), %edx -; X32-NEXT: movl 16(%ebp), %ecx -; X32-NEXT: movl 8(%ebp), %esi -; X32-NEXT: movdqu (%esi), %xmm1 -; X32-NEXT: movdqu (%ecx), %xmm0 -; X32-NEXT: pcmpestri $25, %xmm0, %xmm1 -; X32-NEXT: cmpl $8, %ecx -; X32-NEXT: jne .LBB11_2 -; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: jmp .LBB11_3 -; X32-NEXT: .LBB11_2: # %compare -; X32-NEXT: movdqa %xmm1, (%esp) -; X32-NEXT: addl %ecx, %ecx -; X32-NEXT: andl $14, %ecx -; X32-NEXT: movzwl (%esp,%ecx), %eax -; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: subw 16(%esp,%ecx), %ax -; X32-NEXT: .LBB11_3: # %exit -; X32-NEXT: movzwl %ax, %eax -; X32-NEXT: leal -4(%ebp), %esp -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X86-LABEL: pcmpestri_mem_diff_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl 20(%ebp), %edx +; X86-NEXT: movl 16(%ebp), %ecx +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movdqu (%esi), %xmm1 +; X86-NEXT: movdqu (%ecx), %xmm0 +; X86-NEXT: pcmpestri $25, %xmm0, %xmm1 +; X86-NEXT: cmpl $8, %ecx +; X86-NEXT: jne .LBB11_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB11_3 +; X86-NEXT: .LBB11_2: # %compare +; X86-NEXT: movdqa %xmm1, (%esp) +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: andl $14, %ecx +; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: .LBB11_3: # %exit +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: leal -4(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_mem_diff_i16: ; X64: # %bb.0: # %entry @@ -520,11 +520,11 @@ exit: } define i1 @pcmpistri_reg_eq_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { -; X32-LABEL: pcmpistri_reg_eq_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 -; X32-NEXT: setae %al -; X32-NEXT: retl +; X86-LABEL: pcmpistri_reg_eq_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 +; X86-NEXT: setae %al +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_reg_eq_i8: ; X64: # %bb.0: # %entry @@ -538,11 +538,11 @@ entry: } define i32 @pcmpistri_reg_idx_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { -; X32-LABEL: pcmpistri_reg_idx_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: retl +; X86-LABEL: pcmpistri_reg_idx_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_reg_idx_i8: ; X64: # %bb.0: # %entry @@ -555,29 +555,29 @@ entry: } define i32 @pcmpistri_reg_diff_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { -; X32-LABEL: pcmpistri_reg_diff_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 -; X32-NEXT: cmpl $16, %ecx -; X32-NEXT: jne .LBB14_2 -; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: movzbl %al, %eax -; X32-NEXT: retl -; X32-NEXT: .LBB14_2: # %compare -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movdqa %xmm0, (%esp) -; X32-NEXT: andl $15, %ecx -; X32-NEXT: movb (%esp,%ecx), %al -; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) -; X32-NEXT: subb 16(%esp,%ecx), %al -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: movzbl %al, %eax -; X32-NEXT: retl +; X86-LABEL: pcmpistri_reg_diff_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 +; X86-NEXT: cmpl $16, %ecx +; X86-NEXT: jne .LBB14_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: retl +; X86-NEXT: .LBB14_2: # %compare +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movdqa %xmm0, (%esp) +; X86-NEXT: andl $15, %ecx +; X86-NEXT: movb (%esp,%ecx), %al +; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) +; X86-NEXT: subb 16(%esp,%ecx), %al +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_reg_diff_i8: ; X64: # %bb.0: # %entry @@ -615,14 +615,14 @@ exit: } define i1 @pcmpistri_mem_eq_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { -; X32-LABEL: pcmpistri_mem_eq_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movdqu (%ecx), %xmm0 -; X32-NEXT: pcmpistri $24, (%eax), %xmm0 -; X32-NEXT: setae %al -; X32-NEXT: retl +; X86-LABEL: pcmpistri_mem_eq_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movdqu (%ecx), %xmm0 +; X86-NEXT: pcmpistri $24, (%eax), %xmm0 +; X86-NEXT: setae %al +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_mem_eq_i8: ; X64: # %bb.0: # %entry @@ -641,14 +641,14 @@ entry: } define i32 @pcmpistri_mem_idx_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { -; X32-LABEL: pcmpistri_mem_idx_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movdqu (%ecx), %xmm0 -; X32-NEXT: pcmpistri $24, (%eax), %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: retl +; X86-LABEL: pcmpistri_mem_idx_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movdqu (%ecx), %xmm0 +; X86-NEXT: pcmpistri $24, (%eax), %xmm0 +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_mem_idx_i8: ; X64: # %bb.0: # %entry @@ -666,33 +666,33 @@ entry: } define i32 @pcmpistri_mem_diff_i8(i8* %lhs_ptr, i8* %rhs_ptr) nounwind { -; X32-LABEL: pcmpistri_mem_diff_i8: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 8(%ebp), %ecx -; X32-NEXT: movdqu (%ecx), %xmm1 -; X32-NEXT: movdqu (%eax), %xmm0 -; X32-NEXT: pcmpistri $24, %xmm0, %xmm1 -; X32-NEXT: cmpl $16, %ecx -; X32-NEXT: jne .LBB17_2 -; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: jmp .LBB17_3 -; X32-NEXT: .LBB17_2: # %compare -; X32-NEXT: movdqa %xmm1, (%esp) -; X32-NEXT: andl $15, %ecx -; X32-NEXT: movb (%esp,%ecx), %al -; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: subb 16(%esp,%ecx), %al -; X32-NEXT: .LBB17_3: # %exit -; X32-NEXT: movzbl %al, %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X86-LABEL: pcmpistri_mem_diff_i8: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: movdqu (%ecx), %xmm1 +; X86-NEXT: movdqu (%eax), %xmm0 +; X86-NEXT: pcmpistri $24, %xmm0, %xmm1 +; X86-NEXT: cmpl $16, %ecx +; X86-NEXT: jne .LBB17_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB17_3 +; X86-NEXT: .LBB17_2: # %compare +; X86-NEXT: movdqa %xmm1, (%esp) +; X86-NEXT: andl $15, %ecx +; X86-NEXT: movb (%esp,%ecx), %al +; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: subb 16(%esp,%ecx), %al +; X86-NEXT: .LBB17_3: # %exit +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_mem_diff_i8: ; X64: # %bb.0: # %entry @@ -736,11 +736,11 @@ exit: } define i1 @pcmpistri_reg_eq_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { -; X32-LABEL: pcmpistri_reg_eq_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 -; X32-NEXT: setae %al -; X32-NEXT: retl +; X86-LABEL: pcmpistri_reg_eq_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 +; X86-NEXT: setae %al +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_reg_eq_i16: ; X64: # %bb.0: # %entry @@ -756,11 +756,11 @@ entry: } define i32 @pcmpistri_reg_idx_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { -; X32-LABEL: pcmpistri_reg_idx_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: retl +; X86-LABEL: pcmpistri_reg_idx_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_reg_idx_i16: ; X64: # %bb.0: # %entry @@ -775,30 +775,30 @@ entry: } define i32 @pcmpistri_reg_diff_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { -; X32-LABEL: pcmpistri_reg_diff_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 -; X32-NEXT: cmpl $16, %ecx -; X32-NEXT: jne .LBB20_2 -; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: movzwl %ax, %eax -; X32-NEXT: retl -; X32-NEXT: .LBB20_2: # %compare -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movdqa %xmm0, (%esp) -; X32-NEXT: addl %ecx, %ecx -; X32-NEXT: andl $14, %ecx -; X32-NEXT: movzwl (%esp,%ecx), %eax -; X32-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) -; X32-NEXT: subw 16(%esp,%ecx), %ax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: movzwl %ax, %eax -; X32-NEXT: retl +; X86-LABEL: pcmpistri_reg_diff_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 +; X86-NEXT: cmpl $16, %ecx +; X86-NEXT: jne .LBB20_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: retl +; X86-NEXT: .LBB20_2: # %compare +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movdqa %xmm0, (%esp) +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: andl $14, %ecx +; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) +; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_reg_diff_i16: ; X64: # %bb.0: # %entry @@ -838,14 +838,14 @@ exit: } define i1 @pcmpistri_mem_eq_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { -; X32-LABEL: pcmpistri_mem_eq_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movdqu (%ecx), %xmm0 -; X32-NEXT: pcmpistri $25, (%eax), %xmm0 -; X32-NEXT: setae %al -; X32-NEXT: retl +; X86-LABEL: pcmpistri_mem_eq_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movdqu (%ecx), %xmm0 +; X86-NEXT: pcmpistri $25, (%eax), %xmm0 +; X86-NEXT: setae %al +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_mem_eq_i16: ; X64: # %bb.0: # %entry @@ -866,14 +866,14 @@ entry: } define i32 @pcmpistri_mem_idx_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { -; X32-LABEL: pcmpistri_mem_idx_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movdqu (%ecx), %xmm0 -; X32-NEXT: pcmpistri $25, (%eax), %xmm0 -; X32-NEXT: movl %ecx, %eax -; X32-NEXT: retl +; X86-LABEL: pcmpistri_mem_idx_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movdqu (%ecx), %xmm0 +; X86-NEXT: pcmpistri $25, (%eax), %xmm0 +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_mem_idx_i16: ; X64: # %bb.0: # %entry @@ -893,34 +893,34 @@ entry: } define i32 @pcmpistri_mem_diff_i16(i16* %lhs_ptr, i16* %rhs_ptr) nounwind { -; X32-LABEL: pcmpistri_mem_diff_i16: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-16, %esp -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl 12(%ebp), %eax -; X32-NEXT: movl 8(%ebp), %ecx -; X32-NEXT: movdqu (%ecx), %xmm1 -; X32-NEXT: movdqu (%eax), %xmm0 -; X32-NEXT: pcmpistri $25, %xmm0, %xmm1 -; X32-NEXT: cmpl $8, %ecx -; X32-NEXT: jne .LBB23_2 -; X32-NEXT: # %bb.1: -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: jmp .LBB23_3 -; X32-NEXT: .LBB23_2: # %compare -; X32-NEXT: movdqa %xmm1, (%esp) -; X32-NEXT: addl %ecx, %ecx -; X32-NEXT: andl $14, %ecx -; X32-NEXT: movzwl (%esp,%ecx), %eax -; X32-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X32-NEXT: subw 16(%esp,%ecx), %ax -; X32-NEXT: .LBB23_3: # %exit -; X32-NEXT: movzwl %ax, %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X86-LABEL: pcmpistri_mem_diff_i16: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $48, %esp +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: movdqu (%ecx), %xmm1 +; X86-NEXT: movdqu (%eax), %xmm0 +; X86-NEXT: pcmpistri $25, %xmm0, %xmm1 +; X86-NEXT: cmpl $8, %ecx +; X86-NEXT: jne .LBB23_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: jmp .LBB23_3 +; X86-NEXT: .LBB23_2: # %compare +; X86-NEXT: movdqa %xmm1, (%esp) +; X86-NEXT: addl %ecx, %ecx +; X86-NEXT: andl $14, %ecx +; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) +; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: .LBB23_3: # %exit +; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; ; X64-LABEL: pcmpistri_mem_diff_i16: ; X64: # %bb.0: # %entry @@ -966,24 +966,24 @@ exit: } define void @pcmpestr_index_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i32* %iptr, i32* %fptr) nounwind { -; X32-LABEL: pcmpestr_index_flag: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X32-NEXT: setb %bl -; X32-NEXT: movl %ecx, (%edi) -; X32-NEXT: movl %ebx, (%esi) -; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: retl +; X86-LABEL: pcmpestr_index_flag: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 +; X86-NEXT: setb %bl +; X86-NEXT: movl %ecx, (%edi) +; X86-NEXT: movl %ebx, (%esi) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl ; ; X64-LABEL: pcmpestr_index_flag: ; X64: # %bb.0: # %entry @@ -1006,22 +1006,22 @@ entry: } define void @pcmpestr_mask_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %fptr) nounwind { -; X32-LABEL: pcmpestr_mask_flag: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 -; X32-NEXT: setb %bl -; X32-NEXT: movdqa %xmm0, (%esi) -; X32-NEXT: movl %ebx, (%ecx) -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx -; X32-NEXT: retl +; X86-LABEL: pcmpestr_mask_flag: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: pcmpestrm $24, %xmm1, %xmm0 +; X86-NEXT: setb %bl +; X86-NEXT: movdqa %xmm0, (%esi) +; X86-NEXT: movl %ebx, (%ecx) +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl ; ; X64-LABEL: pcmpestr_mask_flag: ; X64: # %bb.0: # %entry @@ -1043,22 +1043,22 @@ entry: } define void @pcmpestr_mask_index(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %iptr) nounwind { -; X32-LABEL: pcmpestr_mask_index: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi -; X32-NEXT: movdqa %xmm0, %xmm2 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: pcmpestri $24, %xmm1, %xmm2 -; X32-NEXT: movdqa %xmm0, (%edi) -; X32-NEXT: movl %ecx, (%esi) -; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: retl +; X86-LABEL: pcmpestr_mask_index: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movdqa %xmm0, %xmm2 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pcmpestrm $24, %xmm1, %xmm0 +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: pcmpestri $24, %xmm1, %xmm2 +; X86-NEXT: movdqa %xmm0, (%edi) +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl ; ; X64-LABEL: pcmpestr_mask_index: ; X64: # %bb.0: # %entry @@ -1081,30 +1081,30 @@ entry: } define void @pcmpestr_mask_index_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { -; X32-LABEL: pcmpestr_mask_index_flag: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebp -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi -; X32-NEXT: pushl %esi -; X32-NEXT: movdqa %xmm0, %xmm2 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: pcmpestrm $24, %xmm1, %xmm0 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm2 -; X32-NEXT: setb %bl -; X32-NEXT: movdqa %xmm0, (%ebp) -; X32-NEXT: movl %ecx, (%edi) -; X32-NEXT: movl %ebx, (%esi) -; X32-NEXT: popl %esi -; X32-NEXT: popl %edi -; X32-NEXT: popl %ebx -; X32-NEXT: popl %ebp -; X32-NEXT: retl +; X86-LABEL: pcmpestr_mask_index_flag: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movdqa %xmm0, %xmm2 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pcmpestrm $24, %xmm1, %xmm0 +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm2 +; X86-NEXT: setb %bl +; X86-NEXT: movdqa %xmm0, (%ebp) +; X86-NEXT: movl %ecx, (%edi) +; X86-NEXT: movl %ebx, (%esi) +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl ; ; X64-LABEL: pcmpestr_mask_index_flag: ; X64: # %bb.0: # %entry @@ -1132,18 +1132,18 @@ entry: } define void @pcmpistr_index_flag(<16 x i8> %lhs, <16 x i8> %rhs, i32* %iptr, i32* %fptr) nounwind { -; X32-LABEL: pcmpistr_index_flag: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 -; X32-NEXT: setb %bl -; X32-NEXT: movl %ecx, (%edx) -; X32-NEXT: movl %ebx, (%eax) -; X32-NEXT: popl %ebx -; X32-NEXT: retl +; X86-LABEL: pcmpistr_index_flag: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 +; X86-NEXT: setb %bl +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: popl %ebx +; X86-NEXT: retl ; ; X64-LABEL: pcmpistr_index_flag: ; X64: # %bb.0: # %entry @@ -1162,16 +1162,16 @@ entry: } define void @pcmpistr_mask_flag(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %fptr) nounwind { -; X32-LABEL: pcmpistr_mask_flag: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 -; X32-NEXT: setb %dl -; X32-NEXT: movdqa %xmm0, (%ecx) -; X32-NEXT: movl %edx, (%eax) -; X32-NEXT: retl +; X86-LABEL: pcmpistr_mask_flag: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: pcmpistrm $24, %xmm1, %xmm0 +; X86-NEXT: setb %dl +; X86-NEXT: movdqa %xmm0, (%ecx) +; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: retl ; ; X64-LABEL: pcmpistr_mask_flag: ; X64: # %bb.0: # %entry @@ -1190,15 +1190,15 @@ entry: } define void @pcmpistr_mask_index(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %iptr) nounwind { -; X32-LABEL: pcmpistr_mask_index: -; X32: # %bb.0: # %entry -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: pcmpistri $24, %xmm1, %xmm0 -; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 -; X32-NEXT: movdqa %xmm0, (%edx) -; X32-NEXT: movl %ecx, (%eax) -; X32-NEXT: retl +; X86-LABEL: pcmpistr_mask_index: +; X86: # %bb.0: # %entry +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 +; X86-NEXT: pcmpistrm $24, %xmm1, %xmm0 +; X86-NEXT: movdqa %xmm0, (%edx) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: retl ; ; X64-LABEL: pcmpistr_mask_index: ; X64: # %bb.0: # %entry @@ -1216,24 +1216,24 @@ entry: } define void @pcmpistr_mask_index_flag(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { -; X32-LABEL: pcmpistr_mask_index_flag: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi -; X32-NEXT: movdqa %xmm0, %xmm2 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: pcmpistrm $24, %xmm1, %xmm0 -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: pcmpistri $24, %xmm1, %xmm2 -; X32-NEXT: setb %bl -; X32-NEXT: movdqa %xmm0, (%esi) -; X32-NEXT: movl %ecx, (%edx) -; X32-NEXT: movl %ebx, (%eax) -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx -; X32-NEXT: retl +; X86-LABEL: pcmpistr_mask_index_flag: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: movdqa %xmm0, %xmm2 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: pcmpistrm $24, %xmm1, %xmm0 +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: pcmpistri $24, %xmm1, %xmm2 +; X86-NEXT: setb %bl +; X86-NEXT: movdqa %xmm0, (%esi) +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl ; ; X64-LABEL: pcmpistr_mask_index_flag: ; X64: # %bb.0: # %entry @@ -1258,26 +1258,26 @@ entry: ; Make sure we don't fold loads when we need to emit pcmpistrm and pcmpistri. define void @pcmpistr_mask_index_flag_load(<16 x i8> %lhs, <16 x i8>* %rhsptr, <16 x i8>* %mptr, i32* %iptr, i32* %fptr) nounwind { -; X32-LABEL: pcmpistr_mask_index_flag_load: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %esi -; X32-NEXT: movdqa %xmm0, %xmm1 -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movdqu (%ecx), %xmm2 -; X32-NEXT: pcmpistrm $24, %xmm2, %xmm0 -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: pcmpistri $24, %xmm2, %xmm1 -; X32-NEXT: setb %bl -; X32-NEXT: movdqa %xmm0, (%esi) -; X32-NEXT: movl %ecx, (%edx) -; X32-NEXT: movl %ebx, (%eax) -; X32-NEXT: popl %esi -; X32-NEXT: popl %ebx -; X32-NEXT: retl +; X86-LABEL: pcmpistr_mask_index_flag_load: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %esi +; X86-NEXT: movdqa %xmm0, %xmm1 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movdqu (%ecx), %xmm2 +; X86-NEXT: pcmpistrm $24, %xmm2, %xmm0 +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: pcmpistri $24, %xmm2, %xmm1 +; X86-NEXT: setb %bl +; X86-NEXT: movdqa %xmm0, (%esi) +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx +; X86-NEXT: retl ; ; X64-LABEL: pcmpistr_mask_index_flag_load: ; X64: # %bb.0: # %entry @@ -1305,19 +1305,19 @@ entry: ; Make sure we don't fold nontemporal loads. define i32 @pcmpestri_nontemporal(<16 x i8> %lhs, i32 %lhs_len, <16 x i8>* %rhsptr, i32 %rhs_len) nounwind { -; X32-LABEL: pcmpestri_nontemporal: -; X32: # %bb.0: # %entry -; X32-NEXT: pushl %ebx -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %edx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movntdqa (%ecx), %xmm1 -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: pcmpestri $24, %xmm1, %xmm0 -; X32-NEXT: setb %bl -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popl %ebx -; X32-NEXT: retl +; X86-LABEL: pcmpestri_nontemporal: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movntdqa (%ecx), %xmm1 +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 +; X86-NEXT: setb %bl +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: popl %ebx +; X86-NEXT: retl ; ; X64-LABEL: pcmpestri_nontemporal: ; X64: # %bb.0: # %entry