diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 4cc7bb9c3b55a9..c581eb7a60aac9 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -716,7 +716,17 @@ inline SpecificInt_match m_SpecificInt(uint64_t V) { inline SpecificInt_match m_Zero() { return m_SpecificInt(0U); } inline SpecificInt_match m_One() { return m_SpecificInt(1U); } -inline SpecificInt_match m_AllOnes() { return m_SpecificInt(~0U); } + +struct AllOnes_match { + + AllOnes_match() = default; + + template bool match(const MatchContext &, SDValue N) { + return isAllOnesOrAllOnesSplat(N); + } +}; + +inline AllOnes_match m_AllOnes() { return AllOnes_match(); } /// Match true boolean value based on the information provided by /// TargetLowering. @@ -766,7 +776,7 @@ inline BinaryOpc_match m_Neg(const ValTy &V) { /// Match a Not as a xor(v, -1) or xor(-1, v) template -inline BinaryOpc_match m_Not(const ValTy &V) { +inline BinaryOpc_match m_Not(const ValTy &V) { return m_Xor(V, m_AllOnes()); } diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll index ae66c5420638bc..f1fd05565c47e9 100644 --- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll +++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll @@ -2384,52 +2384,45 @@ define void @vec384_v2f64(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. define void @vec384_v3i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind { ; SCALAR-LABEL: vec384_v3i8: ; SCALAR: # %bb.0: -; SCALAR-NEXT: movl (%rdi), %ecx -; SCALAR-NEXT: movl %ecx, %eax -; SCALAR-NEXT: shrl $16, %eax -; SCALAR-NEXT: movl %ecx, %edi -; SCALAR-NEXT: shrl $8, %edi +; SCALAR-NEXT: movl (%rdi), %eax +; SCALAR-NEXT: movl %eax, %ecx +; SCALAR-NEXT: shrl $16, %ecx ; SCALAR-NEXT: notb %cl -; SCALAR-NEXT: movzbl %cl, %r8d -; SCALAR-NEXT: notb %dil -; SCALAR-NEXT: movzbl %dil, %ecx -; SCALAR-NEXT: shll $8, %ecx -; SCALAR-NEXT: orl %r8d, %ecx -; SCALAR-NEXT: notb %al -; SCALAR-NEXT: movb %al, 2(%rsi) -; SCALAR-NEXT: movw %cx, (%rsi) -; SCALAR-NEXT: movb %al, 2(%rdx) -; SCALAR-NEXT: movw %cx, (%rdx) -; SCALAR-NEXT: movb %al, 6(%rdx) -; SCALAR-NEXT: movw %cx, 4(%rdx) -; SCALAR-NEXT: movb %al, 10(%rdx) -; SCALAR-NEXT: movw %cx, 8(%rdx) -; SCALAR-NEXT: movb %al, 14(%rdx) -; SCALAR-NEXT: movw %cx, 12(%rdx) -; SCALAR-NEXT: movb %al, 18(%rdx) -; SCALAR-NEXT: movw %cx, 16(%rdx) -; SCALAR-NEXT: movb %al, 22(%rdx) -; SCALAR-NEXT: movw %cx, 20(%rdx) -; SCALAR-NEXT: movb %al, 26(%rdx) -; SCALAR-NEXT: movw %cx, 24(%rdx) -; SCALAR-NEXT: movb %al, 30(%rdx) -; SCALAR-NEXT: movw %cx, 28(%rdx) -; SCALAR-NEXT: movb %al, 34(%rdx) -; SCALAR-NEXT: movw %cx, 32(%rdx) -; SCALAR-NEXT: movb %al, 38(%rdx) -; SCALAR-NEXT: movw %cx, 36(%rdx) -; SCALAR-NEXT: movb %al, 42(%rdx) -; SCALAR-NEXT: movw %cx, 40(%rdx) -; SCALAR-NEXT: movb %al, 46(%rdx) -; SCALAR-NEXT: movw %cx, 44(%rdx) -; SCALAR-NEXT: movb %al, 50(%rdx) -; SCALAR-NEXT: movw %cx, 48(%rdx) -; SCALAR-NEXT: movb %al, 54(%rdx) -; SCALAR-NEXT: movw %cx, 52(%rdx) -; SCALAR-NEXT: movb %al, 58(%rdx) -; SCALAR-NEXT: movw %cx, 56(%rdx) -; SCALAR-NEXT: movb %al, 62(%rdx) -; SCALAR-NEXT: movw %cx, 60(%rdx) +; SCALAR-NEXT: notl %eax +; SCALAR-NEXT: movw %ax, (%rsi) +; SCALAR-NEXT: movb %cl, 2(%rsi) +; SCALAR-NEXT: movb %cl, 2(%rdx) +; SCALAR-NEXT: movw %ax, (%rdx) +; SCALAR-NEXT: movb %cl, 6(%rdx) +; SCALAR-NEXT: movw %ax, 4(%rdx) +; SCALAR-NEXT: movb %cl, 10(%rdx) +; SCALAR-NEXT: movw %ax, 8(%rdx) +; SCALAR-NEXT: movb %cl, 14(%rdx) +; SCALAR-NEXT: movw %ax, 12(%rdx) +; SCALAR-NEXT: movb %cl, 18(%rdx) +; SCALAR-NEXT: movw %ax, 16(%rdx) +; SCALAR-NEXT: movb %cl, 22(%rdx) +; SCALAR-NEXT: movw %ax, 20(%rdx) +; SCALAR-NEXT: movb %cl, 26(%rdx) +; SCALAR-NEXT: movw %ax, 24(%rdx) +; SCALAR-NEXT: movb %cl, 30(%rdx) +; SCALAR-NEXT: movw %ax, 28(%rdx) +; SCALAR-NEXT: movb %cl, 34(%rdx) +; SCALAR-NEXT: movw %ax, 32(%rdx) +; SCALAR-NEXT: movb %cl, 38(%rdx) +; SCALAR-NEXT: movw %ax, 36(%rdx) +; SCALAR-NEXT: movb %cl, 42(%rdx) +; SCALAR-NEXT: movw %ax, 40(%rdx) +; SCALAR-NEXT: movb %cl, 46(%rdx) +; SCALAR-NEXT: movw %ax, 44(%rdx) +; SCALAR-NEXT: movb %cl, 50(%rdx) +; SCALAR-NEXT: movw %ax, 48(%rdx) +; SCALAR-NEXT: movb %cl, 54(%rdx) +; SCALAR-NEXT: movw %ax, 52(%rdx) +; SCALAR-NEXT: movb %cl, 58(%rdx) +; SCALAR-NEXT: movw %ax, 56(%rdx) +; SCALAR-NEXT: movb %cl, 62(%rdx) +; SCALAR-NEXT: movw %ax, 60(%rdx) ; SCALAR-NEXT: retq ; ; SSE2-ONLY-LABEL: vec384_v3i8: @@ -3784,56 +3777,29 @@ define void @vec384_v4f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. define void @vec384_v6i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind { ; SCALAR-LABEL: vec384_v6i8: ; SCALAR: # %bb.0: -; SCALAR-NEXT: movq (%rdi), %rdi -; SCALAR-NEXT: movq %rdi, %rax -; SCALAR-NEXT: shrq $40, %rax -; SCALAR-NEXT: movq %rdi, %rcx +; SCALAR-NEXT: movq (%rdi), %rax +; SCALAR-NEXT: movq %rax, %rcx ; SCALAR-NEXT: shrq $32, %rcx -; SCALAR-NEXT: movl %edi, %r8d -; SCALAR-NEXT: shrl $24, %r8d -; SCALAR-NEXT: movl %edi, %r9d -; SCALAR-NEXT: shrl $16, %r9d -; SCALAR-NEXT: movl %edi, %r10d -; SCALAR-NEXT: shrl $8, %r10d -; SCALAR-NEXT: notb %dil -; SCALAR-NEXT: movzbl %dil, %edi -; SCALAR-NEXT: notb %r10b -; SCALAR-NEXT: movzbl %r10b, %r10d -; SCALAR-NEXT: shll $8, %r10d -; SCALAR-NEXT: orl %edi, %r10d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %edi -; SCALAR-NEXT: notb %r8b -; SCALAR-NEXT: movzbl %r8b, %r8d -; SCALAR-NEXT: shll $8, %r8d -; SCALAR-NEXT: orl %edi, %r8d -; SCALAR-NEXT: notb %cl -; SCALAR-NEXT: movzbl %cl, %ecx -; SCALAR-NEXT: notb %al -; SCALAR-NEXT: movzbl %al, %eax -; SCALAR-NEXT: shll $8, %eax -; SCALAR-NEXT: orl %ecx, %eax -; SCALAR-NEXT: movw %ax, 4(%rsi) -; SCALAR-NEXT: shll $16, %r8d -; SCALAR-NEXT: movzwl %r10w, %ecx -; SCALAR-NEXT: orl %r8d, %ecx -; SCALAR-NEXT: movl %ecx, (%rsi) -; SCALAR-NEXT: movw %ax, 4(%rdx) -; SCALAR-NEXT: movl %ecx, (%rdx) -; SCALAR-NEXT: movw %ax, 12(%rdx) -; SCALAR-NEXT: movl %ecx, 8(%rdx) -; SCALAR-NEXT: movw %ax, 20(%rdx) -; SCALAR-NEXT: movl %ecx, 16(%rdx) -; SCALAR-NEXT: movw %ax, 28(%rdx) -; SCALAR-NEXT: movl %ecx, 24(%rdx) -; SCALAR-NEXT: movw %ax, 36(%rdx) -; SCALAR-NEXT: movl %ecx, 32(%rdx) -; SCALAR-NEXT: movw %ax, 44(%rdx) -; SCALAR-NEXT: movl %ecx, 40(%rdx) -; SCALAR-NEXT: movw %ax, 52(%rdx) -; SCALAR-NEXT: movl %ecx, 48(%rdx) -; SCALAR-NEXT: movw %ax, 60(%rdx) -; SCALAR-NEXT: movl %ecx, 56(%rdx) +; SCALAR-NEXT: notl %ecx +; SCALAR-NEXT: notl %eax +; SCALAR-NEXT: movl %eax, (%rsi) +; SCALAR-NEXT: movw %cx, 4(%rsi) +; SCALAR-NEXT: movw %cx, 4(%rdx) +; SCALAR-NEXT: movl %eax, (%rdx) +; SCALAR-NEXT: movw %cx, 12(%rdx) +; SCALAR-NEXT: movl %eax, 8(%rdx) +; SCALAR-NEXT: movw %cx, 20(%rdx) +; SCALAR-NEXT: movl %eax, 16(%rdx) +; SCALAR-NEXT: movw %cx, 28(%rdx) +; SCALAR-NEXT: movl %eax, 24(%rdx) +; SCALAR-NEXT: movw %cx, 36(%rdx) +; SCALAR-NEXT: movl %eax, 32(%rdx) +; SCALAR-NEXT: movw %cx, 44(%rdx) +; SCALAR-NEXT: movl %eax, 40(%rdx) +; SCALAR-NEXT: movw %cx, 52(%rdx) +; SCALAR-NEXT: movl %eax, 48(%rdx) +; SCALAR-NEXT: movw %cx, 60(%rdx) +; SCALAR-NEXT: movl %eax, 56(%rdx) ; SCALAR-NEXT: retq ; ; SSE2-ONLY-LABEL: vec384_v6i8: @@ -4062,31 +4028,20 @@ define void @vec384_v6i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.p define void @vec384_v6i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind { ; SCALAR-LABEL: vec384_v6i16: ; SCALAR: # %bb.0: -; SCALAR-NEXT: movl 8(%rdi), %eax -; SCALAR-NEXT: movq (%rdi), %rcx -; SCALAR-NEXT: movq %rcx, %rdi -; SCALAR-NEXT: shrq $32, %rdi -; SCALAR-NEXT: movq %rcx, %r8 -; SCALAR-NEXT: shrq $48, %r8 -; SCALAR-NEXT: notl %r8d -; SCALAR-NEXT: shll $16, %r8d -; SCALAR-NEXT: notl %edi -; SCALAR-NEXT: movzwl %di, %edi -; SCALAR-NEXT: orl %r8d, %edi +; SCALAR-NEXT: movq (%rdi), %rax +; SCALAR-NEXT: movl 8(%rdi), %ecx ; SCALAR-NEXT: notl %ecx -; SCALAR-NEXT: notl %eax -; SCALAR-NEXT: movl %eax, 8(%rsi) -; SCALAR-NEXT: shlq $32, %rdi -; SCALAR-NEXT: orq %rdi, %rcx -; SCALAR-NEXT: movq %rcx, (%rsi) -; SCALAR-NEXT: movl %eax, 8(%rdx) -; SCALAR-NEXT: movq %rcx, (%rdx) -; SCALAR-NEXT: movl %eax, 24(%rdx) -; SCALAR-NEXT: movq %rcx, 16(%rdx) -; SCALAR-NEXT: movl %eax, 40(%rdx) -; SCALAR-NEXT: movq %rcx, 32(%rdx) -; SCALAR-NEXT: movl %eax, 56(%rdx) -; SCALAR-NEXT: movq %rcx, 48(%rdx) +; SCALAR-NEXT: notq %rax +; SCALAR-NEXT: movq %rax, (%rsi) +; SCALAR-NEXT: movl %ecx, 8(%rsi) +; SCALAR-NEXT: movl %ecx, 8(%rdx) +; SCALAR-NEXT: movq %rax, (%rdx) +; SCALAR-NEXT: movl %ecx, 24(%rdx) +; SCALAR-NEXT: movq %rax, 16(%rdx) +; SCALAR-NEXT: movl %ecx, 40(%rdx) +; SCALAR-NEXT: movq %rax, 32(%rdx) +; SCALAR-NEXT: movl %ecx, 56(%rdx) +; SCALAR-NEXT: movq %rax, 48(%rdx) ; SCALAR-NEXT: retq ; ; SSE2-ONLY-LABEL: vec384_v6i16: @@ -4579,95 +4534,20 @@ define void @vec384_v8i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. define void @vec384_v12i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind { ; SCALAR-LABEL: vec384_v12i8: ; SCALAR: # %bb.0: -; SCALAR-NEXT: pushq %rbp -; SCALAR-NEXT: pushq %r15 -; SCALAR-NEXT: pushq %r14 -; SCALAR-NEXT: pushq %r12 -; SCALAR-NEXT: pushq %rbx -; SCALAR-NEXT: movq (%rdi), %r9 -; SCALAR-NEXT: movq 8(%rdi), %rcx -; SCALAR-NEXT: movl %ecx, %eax -; SCALAR-NEXT: shrl $8, %eax -; SCALAR-NEXT: movl %ecx, %edi -; SCALAR-NEXT: shrl $24, %edi -; SCALAR-NEXT: movl %ecx, %r8d -; SCALAR-NEXT: shrl $16, %r8d -; SCALAR-NEXT: movq %r9, %r10 -; SCALAR-NEXT: shrq $40, %r10 -; SCALAR-NEXT: movq %r9, %r11 -; SCALAR-NEXT: shrq $32, %r11 -; SCALAR-NEXT: movq %r9, %rbx -; SCALAR-NEXT: shrq $56, %rbx -; SCALAR-NEXT: movq %r9, %r14 -; SCALAR-NEXT: shrq $48, %r14 -; SCALAR-NEXT: movl %r9d, %ebp -; SCALAR-NEXT: shrl $8, %ebp -; SCALAR-NEXT: movl %r9d, %r15d -; SCALAR-NEXT: shrl $24, %r15d -; SCALAR-NEXT: movl %r9d, %r12d -; SCALAR-NEXT: shrl $16, %r12d -; SCALAR-NEXT: notb %r12b -; SCALAR-NEXT: movzbl %r12b, %r12d -; SCALAR-NEXT: notb %r15b -; SCALAR-NEXT: movzbl %r15b, %r15d -; SCALAR-NEXT: shll $8, %r15d -; SCALAR-NEXT: orl %r12d, %r15d -; SCALAR-NEXT: shll $16, %r15d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r9d -; SCALAR-NEXT: notb %bpl -; SCALAR-NEXT: movzbl %bpl, %ebp -; SCALAR-NEXT: shll $8, %ebp -; SCALAR-NEXT: orl %r9d, %ebp -; SCALAR-NEXT: movzwl %bp, %r9d -; SCALAR-NEXT: orl %r15d, %r9d -; SCALAR-NEXT: notb %r14b -; SCALAR-NEXT: movzbl %r14b, %ebp -; SCALAR-NEXT: notb %bl -; SCALAR-NEXT: movzbl %bl, %ebx -; SCALAR-NEXT: shll $8, %ebx -; SCALAR-NEXT: orl %ebp, %ebx -; SCALAR-NEXT: shll $16, %ebx -; SCALAR-NEXT: notb %r11b -; SCALAR-NEXT: movzbl %r11b, %r11d -; SCALAR-NEXT: notb %r10b -; SCALAR-NEXT: movzbl %r10b, %r10d -; SCALAR-NEXT: shll $8, %r10d -; SCALAR-NEXT: orl %r11d, %r10d -; SCALAR-NEXT: movzwl %r10w, %r10d -; SCALAR-NEXT: orl %ebx, %r10d -; SCALAR-NEXT: notb %r8b -; SCALAR-NEXT: movzbl %r8b, %r8d -; SCALAR-NEXT: notb %dil -; SCALAR-NEXT: movzbl %dil, %edi -; SCALAR-NEXT: shll $8, %edi -; SCALAR-NEXT: orl %r8d, %edi -; SCALAR-NEXT: shll $16, %edi -; SCALAR-NEXT: notb %cl -; SCALAR-NEXT: movzbl %cl, %ecx -; SCALAR-NEXT: notb %al -; SCALAR-NEXT: movzbl %al, %eax -; SCALAR-NEXT: shll $8, %eax -; SCALAR-NEXT: orl %ecx, %eax -; SCALAR-NEXT: movzwl %ax, %eax -; SCALAR-NEXT: orl %edi, %eax -; SCALAR-NEXT: movl %eax, 8(%rsi) -; SCALAR-NEXT: shlq $32, %r10 -; SCALAR-NEXT: orq %r10, %r9 -; SCALAR-NEXT: movq %r9, (%rsi) -; SCALAR-NEXT: movl %eax, 8(%rdx) -; SCALAR-NEXT: movq %r9, (%rdx) -; SCALAR-NEXT: movl %eax, 24(%rdx) -; SCALAR-NEXT: movq %r9, 16(%rdx) -; SCALAR-NEXT: movl %eax, 40(%rdx) -; SCALAR-NEXT: movq %r9, 32(%rdx) -; SCALAR-NEXT: movl %eax, 56(%rdx) -; SCALAR-NEXT: movq %r9, 48(%rdx) -; SCALAR-NEXT: popq %rbx -; SCALAR-NEXT: popq %r12 -; SCALAR-NEXT: popq %r14 -; SCALAR-NEXT: popq %r15 -; SCALAR-NEXT: popq %rbp +; SCALAR-NEXT: movq (%rdi), %rax +; SCALAR-NEXT: movl 8(%rdi), %ecx +; SCALAR-NEXT: notl %ecx +; SCALAR-NEXT: notq %rax +; SCALAR-NEXT: movq %rax, (%rsi) +; SCALAR-NEXT: movl %ecx, 8(%rsi) +; SCALAR-NEXT: movl %ecx, 8(%rdx) +; SCALAR-NEXT: movq %rax, (%rdx) +; SCALAR-NEXT: movl %ecx, 24(%rdx) +; SCALAR-NEXT: movq %rax, 16(%rdx) +; SCALAR-NEXT: movl %ecx, 40(%rdx) +; SCALAR-NEXT: movq %rax, 32(%rdx) +; SCALAR-NEXT: movl %ecx, 56(%rdx) +; SCALAR-NEXT: movq %rax, 48(%rdx) ; SCALAR-NEXT: retq ; ; SSE2-ONLY-LABEL: vec384_v12i8: @@ -4785,47 +4665,12 @@ define void @vec384_v12i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. define void @vec384_v12i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec.ptr) nounwind { ; SCALAR-LABEL: vec384_v12i16: ; SCALAR: # %bb.0: -; SCALAR-NEXT: pushq %r14 -; SCALAR-NEXT: pushq %rbx ; SCALAR-NEXT: movq (%rdi), %rax ; SCALAR-NEXT: movq 8(%rdi), %rcx -; SCALAR-NEXT: movq %rax, %r8 -; SCALAR-NEXT: shrq $32, %r8 -; SCALAR-NEXT: movq %rax, %r9 -; SCALAR-NEXT: shrq $48, %r9 -; SCALAR-NEXT: movq %rcx, %r10 -; SCALAR-NEXT: shrq $32, %r10 -; SCALAR-NEXT: movq %rcx, %r11 -; SCALAR-NEXT: shrq $48, %r11 ; SCALAR-NEXT: movq 16(%rdi), %rdi -; SCALAR-NEXT: movq %rdi, %rbx -; SCALAR-NEXT: shrq $32, %rbx -; SCALAR-NEXT: movq %rdi, %r14 -; SCALAR-NEXT: shrq $48, %r14 -; SCALAR-NEXT: notl %r14d -; SCALAR-NEXT: shll $16, %r14d -; SCALAR-NEXT: notl %ebx -; SCALAR-NEXT: movzwl %bx, %ebx -; SCALAR-NEXT: orl %r14d, %ebx -; SCALAR-NEXT: shlq $32, %rbx -; SCALAR-NEXT: notl %edi -; SCALAR-NEXT: orq %rbx, %rdi -; SCALAR-NEXT: notl %r11d -; SCALAR-NEXT: shll $16, %r11d -; SCALAR-NEXT: notl %r10d -; SCALAR-NEXT: movzwl %r10w, %r10d -; SCALAR-NEXT: orl %r11d, %r10d -; SCALAR-NEXT: shlq $32, %r10 -; SCALAR-NEXT: notl %ecx -; SCALAR-NEXT: orq %r10, %rcx -; SCALAR-NEXT: notl %r9d -; SCALAR-NEXT: shll $16, %r9d -; SCALAR-NEXT: notl %r8d -; SCALAR-NEXT: movzwl %r8w, %r8d -; SCALAR-NEXT: orl %r9d, %r8d -; SCALAR-NEXT: shlq $32, %r8 -; SCALAR-NEXT: notl %eax -; SCALAR-NEXT: orq %r8, %rax +; SCALAR-NEXT: notq %rdi +; SCALAR-NEXT: notq %rcx +; SCALAR-NEXT: notq %rax ; SCALAR-NEXT: movq %rax, (%rsi) ; SCALAR-NEXT: movq %rcx, 8(%rsi) ; SCALAR-NEXT: movq %rdi, 16(%rsi) @@ -4835,8 +4680,6 @@ define void @vec384_v12i16(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec ; SCALAR-NEXT: movq %rdi, 48(%rdx) ; SCALAR-NEXT: movq %rcx, 40(%rdx) ; SCALAR-NEXT: movq %rax, 32(%rdx) -; SCALAR-NEXT: popq %rbx -; SCALAR-NEXT: popq %r14 ; SCALAR-NEXT: retq ; ; SSE2-LABEL: vec384_v12i16: @@ -5085,144 +4928,9 @@ define void @vec384_v24i8(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR-NEXT: movq (%rdi), %rax ; SCALAR-NEXT: movq 8(%rdi), %rcx ; SCALAR-NEXT: movq 16(%rdi), %rdi -; SCALAR-NEXT: movq %rdi, %r8 -; SCALAR-NEXT: shrq $40, %r8 -; SCALAR-NEXT: movq %rdi, %r9 -; SCALAR-NEXT: shrq $56, %r9 -; SCALAR-NEXT: movq %rdi, %r10 -; SCALAR-NEXT: shrq $48, %r10 -; SCALAR-NEXT: notb %r10b -; SCALAR-NEXT: movzbl %r10b, %r10d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r9d -; SCALAR-NEXT: shll $8, %r9d -; SCALAR-NEXT: orl %r10d, %r9d -; SCALAR-NEXT: movq %rdi, %r10 -; SCALAR-NEXT: shrq $32, %r10 -; SCALAR-NEXT: notb %r10b -; SCALAR-NEXT: movzbl %r10b, %r10d -; SCALAR-NEXT: notb %r8b -; SCALAR-NEXT: movzbl %r8b, %r8d -; SCALAR-NEXT: shll $8, %r8d -; SCALAR-NEXT: orl %r10d, %r8d -; SCALAR-NEXT: movl %edi, %r10d -; SCALAR-NEXT: shrl $24, %r10d -; SCALAR-NEXT: shll $16, %r9d -; SCALAR-NEXT: movzwl %r8w, %r8d -; SCALAR-NEXT: orl %r9d, %r8d -; SCALAR-NEXT: movl %edi, %r9d -; SCALAR-NEXT: shrl $16, %r9d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r9d -; SCALAR-NEXT: notb %r10b -; SCALAR-NEXT: movzbl %r10b, %r10d -; SCALAR-NEXT: shll $8, %r10d -; SCALAR-NEXT: orl %r9d, %r10d -; SCALAR-NEXT: movl %edi, %r9d -; SCALAR-NEXT: shrl $8, %r9d -; SCALAR-NEXT: notb %dil -; SCALAR-NEXT: movzbl %dil, %edi -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r11d -; SCALAR-NEXT: shll $8, %r11d -; SCALAR-NEXT: orl %edi, %r11d -; SCALAR-NEXT: movq %rcx, %r9 -; SCALAR-NEXT: shrq $40, %r9 -; SCALAR-NEXT: shll $16, %r10d -; SCALAR-NEXT: movzwl %r11w, %edi -; SCALAR-NEXT: orl %r10d, %edi -; SCALAR-NEXT: movq %rcx, %r10 -; SCALAR-NEXT: shrq $56, %r10 -; SCALAR-NEXT: shlq $32, %r8 -; SCALAR-NEXT: orq %r8, %rdi -; SCALAR-NEXT: movq %rcx, %r8 -; SCALAR-NEXT: shrq $48, %r8 -; SCALAR-NEXT: notb %r8b -; SCALAR-NEXT: movzbl %r8b, %r8d -; SCALAR-NEXT: notb %r10b -; SCALAR-NEXT: movzbl %r10b, %r10d -; SCALAR-NEXT: shll $8, %r10d -; SCALAR-NEXT: orl %r8d, %r10d -; SCALAR-NEXT: movq %rcx, %r8 -; SCALAR-NEXT: shrq $32, %r8 -; SCALAR-NEXT: notb %r8b -; SCALAR-NEXT: movzbl %r8b, %r8d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r9d -; SCALAR-NEXT: shll $8, %r9d -; SCALAR-NEXT: orl %r8d, %r9d -; SCALAR-NEXT: movl %ecx, %r11d -; SCALAR-NEXT: shrl $24, %r11d -; SCALAR-NEXT: shll $16, %r10d -; SCALAR-NEXT: movzwl %r9w, %r8d -; SCALAR-NEXT: orl %r10d, %r8d -; SCALAR-NEXT: movl %ecx, %r9d -; SCALAR-NEXT: shrl $16, %r9d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r9d -; SCALAR-NEXT: notb %r11b -; SCALAR-NEXT: movzbl %r11b, %r10d -; SCALAR-NEXT: shll $8, %r10d -; SCALAR-NEXT: orl %r9d, %r10d -; SCALAR-NEXT: movl %ecx, %r9d -; SCALAR-NEXT: shrl $8, %r9d -; SCALAR-NEXT: notb %cl -; SCALAR-NEXT: movzbl %cl, %ecx -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r11d -; SCALAR-NEXT: shll $8, %r11d -; SCALAR-NEXT: orl %ecx, %r11d -; SCALAR-NEXT: movq %rax, %r9 -; SCALAR-NEXT: shrq $40, %r9 -; SCALAR-NEXT: shll $16, %r10d -; SCALAR-NEXT: movzwl %r11w, %ecx -; SCALAR-NEXT: orl %r10d, %ecx -; SCALAR-NEXT: movq %rax, %r10 -; SCALAR-NEXT: shrq $56, %r10 -; SCALAR-NEXT: shlq $32, %r8 -; SCALAR-NEXT: orq %r8, %rcx -; SCALAR-NEXT: movq %rax, %r8 -; SCALAR-NEXT: shrq $48, %r8 -; SCALAR-NEXT: notb %r8b -; SCALAR-NEXT: movzbl %r8b, %r8d -; SCALAR-NEXT: notb %r10b -; SCALAR-NEXT: movzbl %r10b, %r10d -; SCALAR-NEXT: shll $8, %r10d -; SCALAR-NEXT: orl %r8d, %r10d -; SCALAR-NEXT: movq %rax, %r8 -; SCALAR-NEXT: shrq $32, %r8 -; SCALAR-NEXT: notb %r8b -; SCALAR-NEXT: movzbl %r8b, %r8d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r9d -; SCALAR-NEXT: shll $8, %r9d -; SCALAR-NEXT: orl %r8d, %r9d -; SCALAR-NEXT: movl %eax, %r11d -; SCALAR-NEXT: shrl $24, %r11d -; SCALAR-NEXT: shll $16, %r10d -; SCALAR-NEXT: movzwl %r9w, %r8d -; SCALAR-NEXT: orl %r10d, %r8d -; SCALAR-NEXT: movl %eax, %r9d -; SCALAR-NEXT: shrl $16, %r9d -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r9d -; SCALAR-NEXT: notb %r11b -; SCALAR-NEXT: movzbl %r11b, %r10d -; SCALAR-NEXT: shll $8, %r10d -; SCALAR-NEXT: orl %r9d, %r10d -; SCALAR-NEXT: movl %eax, %r9d -; SCALAR-NEXT: shrl $8, %r9d -; SCALAR-NEXT: notb %al -; SCALAR-NEXT: movzbl %al, %eax -; SCALAR-NEXT: notb %r9b -; SCALAR-NEXT: movzbl %r9b, %r9d -; SCALAR-NEXT: shll $8, %r9d -; SCALAR-NEXT: orl %eax, %r9d -; SCALAR-NEXT: shll $16, %r10d -; SCALAR-NEXT: movzwl %r9w, %eax -; SCALAR-NEXT: orl %r10d, %eax -; SCALAR-NEXT: shlq $32, %r8 -; SCALAR-NEXT: orq %r8, %rax +; SCALAR-NEXT: notq %rdi +; SCALAR-NEXT: notq %rcx +; SCALAR-NEXT: notq %rax ; SCALAR-NEXT: movq %rax, (%rsi) ; SCALAR-NEXT: movq %rcx, 8(%rsi) ; SCALAR-NEXT: movq %rdi, 16(%rsi)