Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 194 additions & 8 deletions llvm/test/CodeGen/X86/bittest-big-integer.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X64,SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=X64,SSE,SSE4
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=X64,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=X64,AVX,AVX512

Expand Down Expand Up @@ -956,6 +956,192 @@ define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind {
ret i1 %cmp
}

; Load hidden behind bitcast
define <8 x i16> @complement_ne_i128_bitcast(ptr %word, i32 %position) nounwind {
; X86-LABEL: complement_ne_i128_bitcast:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $80, %esp
; X86-NEXT: movzbl 16(%ebp), %ecx
; X86-NEXT: movl 12(%ebp), %edx
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: movl 56(%esp,%eax), %esi
; X86-NEXT: movl 60(%esp,%eax), %ebx
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: shldl %cl, %esi, %ebx
; X86-NEXT: movzwl 14(%edx), %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll $16, %edi
; X86-NEXT: movzwl 12(%edx), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %ecx, %edi
; X86-NEXT: xorl %ebx, %edi
; X86-NEXT: movl 52(%esp,%eax), %edx
; X86-NEXT: movzbl 16(%ebp), %ecx
; X86-NEXT: shldl %cl, %edx, %esi
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movzwl 10(%eax), %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll $16, %ebx
; X86-NEXT: movzwl 8(%eax), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %eax, %ebx
; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: movl 48(%esp,%eax), %esi
; X86-NEXT: shldl %cl, %esi, %edx
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movzwl 6(%ecx), %eax
; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
; X86-NEXT: shll $16, %eax
; X86-NEXT: movzwl 4(%ecx), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %ecx, %eax
; X86-NEXT: xorl %edx, %eax
; X86-NEXT: movzbl 16(%ebp), %ecx
; X86-NEXT: shll %cl, %esi
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movzwl 2(%ecx), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shll $16, %edx
; X86-NEXT: movzwl (%ecx), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: orl %ecx, %edx
; X86-NEXT: xorl %esi, %edx
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl %edi, 12(%ecx)
; X86-NEXT: movl %ebx, 8(%ecx)
; X86-NEXT: movl %eax, 4(%ecx)
; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movw %dx, 14(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movw %dx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movw %dx, 10(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movw %dx, 8(%eax)
; X86-NEXT: movl (%esp), %edx # 4-byte Reload
; X86-NEXT: movw %dx, 6(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movw %dx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movw %cx, 2(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movw %cx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
;
; SSE2-LABEL: complement_ne_i128_bitcast:
; SSE2: # %bb.0:
; SSE2-NEXT: movl %esi, %ecx
; SSE2-NEXT: movl $1, %eax
; SSE2-NEXT: xorl %edx, %edx
; SSE2-NEXT: shldq %cl, %rax, %rdx
; SSE2-NEXT: xorl %esi, %esi
; SSE2-NEXT: shlq %cl, %rax
; SSE2-NEXT: testb $64, %cl
; SSE2-NEXT: cmovneq %rax, %rdx
; SSE2-NEXT: cmovneq %rsi, %rax
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: xorq %rdx, 8(%rdi)
; SSE2-NEXT: movq %xmm0, %rcx
; SSE2-NEXT: xorq %rax, %rcx
; SSE2-NEXT: movq %rcx, (%rdi)
; SSE2-NEXT: retq
;
; SSE4-LABEL: complement_ne_i128_bitcast:
; SSE4: # %bb.0:
; SSE4-NEXT: movl %esi, %ecx
; SSE4-NEXT: movl $1, %eax
; SSE4-NEXT: xorl %edx, %edx
; SSE4-NEXT: shldq %cl, %rax, %rdx
; SSE4-NEXT: shlq %cl, %rax
; SSE4-NEXT: xorl %esi, %esi
; SSE4-NEXT: testb $64, %cl
; SSE4-NEXT: cmovneq %rax, %rdx
; SSE4-NEXT: cmovneq %rsi, %rax
; SSE4-NEXT: movdqa (%rdi), %xmm0
; SSE4-NEXT: movq %xmm0, %rcx
; SSE4-NEXT: xorq %rax, %rcx
; SSE4-NEXT: pextrq $1, %xmm0, %rax
; SSE4-NEXT: xorq %rdx, %rax
; SSE4-NEXT: movq %rax, 8(%rdi)
; SSE4-NEXT: movq %rcx, (%rdi)
; SSE4-NEXT: retq
;
; AVX2-LABEL: complement_ne_i128_bitcast:
; AVX2: # %bb.0:
; AVX2-NEXT: movl %esi, %ecx
; AVX2-NEXT: movl $1, %eax
; AVX2-NEXT: xorl %edx, %edx
; AVX2-NEXT: shldq %cl, %rax, %rdx
; AVX2-NEXT: xorl %esi, %esi
; AVX2-NEXT: shlxq %rcx, %rax, %rax
; AVX2-NEXT: testb $64, %cl
; AVX2-NEXT: cmovneq %rax, %rdx
; AVX2-NEXT: cmovneq %rsi, %rax
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
; AVX2-NEXT: vmovq %xmm0, %rcx
; AVX2-NEXT: vpextrq $1, %xmm0, %rsi
; AVX2-NEXT: xorq %rax, %rcx
; AVX2-NEXT: xorq %rdx, %rsi
; AVX2-NEXT: movq %rsi, 8(%rdi)
; AVX2-NEXT: movq %rcx, (%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: complement_ne_i128_bitcast:
; AVX512: # %bb.0:
; AVX512-NEXT: movl %esi, %ecx
; AVX512-NEXT: xorl %eax, %eax
; AVX512-NEXT: movl $1, %edx
; AVX512-NEXT: xorl %esi, %esi
; AVX512-NEXT: shldq %cl, %rdx, %rsi
; AVX512-NEXT: shlxq %rcx, %rdx, %rdx
; AVX512-NEXT: testb $64, %cl
; AVX512-NEXT: cmovneq %rdx, %rsi
; AVX512-NEXT: cmovneq %rax, %rdx
; AVX512-NEXT: vmovdqa (%rdi), %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: xorq %rdx, %rax
; AVX512-NEXT: vpextrq $1, %xmm0, %rcx
; AVX512-NEXT: xorq %rsi, %rcx
; AVX512-NEXT: movq %rcx, 8(%rdi)
; AVX512-NEXT: movq %rax, (%rdi)
; AVX512-NEXT: retq
%rem = and i32 %position, 127
%ofs = zext nneg i32 %rem to i128
%bit = shl nuw i128 1, %ofs
%ldv = load <8 x i16>, ptr %word
%ld = bitcast <8 x i16> %ldv to i128
%test = and i128 %ld, %bit
%res = xor i128 %ld, %bit
store i128 %res, ptr %word
ret <8 x i16> %ldv
}

; Multiple loads in store chain
define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
; X86-LABEL: reset_multiload_i128:
Expand All @@ -975,10 +1161,10 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
; X86-NEXT: btrl %edx, %ebx
; X86-NEXT: btl %edx, %edi
; X86-NEXT: movl %ebx, (%ecx,%esi)
; X86-NEXT: jae .LBB22_2
; X86-NEXT: jae .LBB23_2
; X86-NEXT: # %bb.1:
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .LBB22_2:
; X86-NEXT: .LBB23_2:
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
Expand All @@ -994,10 +1180,10 @@ define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind {
; X64-NEXT: btrl %esi, %r8d
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: btl %esi, %r9d
; X64-NEXT: jb .LBB22_2
; X64-NEXT: jb .LBB23_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movl (%rdx), %eax
; X64-NEXT: .LBB22_2:
; X64-NEXT: .LBB23_2:
; X64-NEXT: movl %r8d, (%rdi,%rcx)
; X64-NEXT: retq
%rem = and i32 %position, 127
Expand Down Expand Up @@ -1046,10 +1232,10 @@ define i32 @chain_reset_i256(ptr %p0, ptr %p1, ptr %p2, i32 %position) nounwind
; X86-NEXT: movl %edi, (%edx)
; X86-NEXT: movl (%eax), %eax
; X86-NEXT: orl %ecx, %ebp
; X86-NEXT: jne .LBB23_2
; X86-NEXT: jne .LBB24_2
; X86-NEXT: # %bb.1:
; X86-NEXT: addl %esi, %eax
; X86-NEXT: .LBB23_2:
; X86-NEXT: .LBB24_2:
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
Expand Down
Loading