Skip to content

Commit

Permalink
[NFC][CodeGen] Add negative test for X u% C == 0 fold (D63391)
Browse files Browse the repository at this point in the history
The fold (D63391) uses multiplicativeInverse(),
but it is not guaranteed to always succeed,
and '100' appears to be one of the problematic values.

llvm-svn: 364578
  • Loading branch information
LebedevRI committed Jun 27, 2019
1 parent c5486b2 commit bd34e50
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 12 deletions.
56 changes: 44 additions & 12 deletions llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
Expand Up @@ -64,6 +64,7 @@ define <4 x i32> @test_urem_even_div(<4 x i32> %X) nounwind readnone {
ret <4 x i32> %ret
}

; Can't fold due to last line
define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind readnone {
; CHECK-LABEL: test_urem_pow2:
; CHECK: // %bb.0:
Expand All @@ -89,6 +90,7 @@ define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind readnone {
ret <4 x i32> %ret
}

; Can't fold due to second line
define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
; CHECK-LABEL: test_urem_one:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -121,14 +123,44 @@ define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
ret <4 x i32> %ret
}

; Can't fold due to second line
define <4 x i32> @test_urem_nomulinv(<4 x i32> %X) nounwind readnone {
; CHECK-LABEL: test_urem_nomulinv:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: adrp x8, .LCPI4_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: neg v1.4s, v1.4s
; CHECK-NEXT: adrp x8, .LCPI4_3
; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_3]
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 6, i32 100, i32 12, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}

define <4 x i32> @test_urem_comp(<4 x i32> %X) nounwind readnone {
; CHECK-LABEL: test_urem_comp:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #52429
; CHECK-NEXT: movk w8, #52428, lsl #16
; CHECK-NEXT: adrp x9, .LCPI4_0
; CHECK-NEXT: adrp x9, .LCPI5_0
; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI4_0]
; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_0]
; CHECK-NEXT: umull2 v4.2d, v0.4s, v2.4s
; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s
; CHECK-NEXT: uzp2 v2.4s, v2.4s, v4.4s
Expand All @@ -148,12 +180,12 @@ define <4 x i32> @test_urem_comp(<4 x i32> %X) nounwind readnone {
define <4 x i32> @test_urem_both(<4 x i32> %X) nounwind readnone {
; CHECK-LABEL: test_urem_both:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI5_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: adrp x8, .LCPI5_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_1]
; CHECK-NEXT: adrp x8, .LCPI5_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI5_2]
; CHECK-NEXT: adrp x8, .LCPI6_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0]
; CHECK-NEXT: adrp x8, .LCPI6_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_1]
; CHECK-NEXT: adrp x8, .LCPI6_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI6_2]
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
Expand Down Expand Up @@ -216,10 +248,10 @@ define <4 x i32> @test_urem_both_undef(<4 x i32> %X) nounwind readnone {
define <4 x i32> @test_urem_div_even_odd(<4 x i32> %X) nounwind readnone {
; CHECK-LABEL: test_urem_div_even_odd:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI9_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0]
; CHECK-NEXT: adrp x8, .LCPI9_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_1]
; CHECK-NEXT: adrp x8, .LCPI10_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0]
; CHECK-NEXT: adrp x8, .LCPI10_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI10_1]
; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/AArch64/urem-seteq.ll
Expand Up @@ -137,6 +137,26 @@ define i32 @test_urem_one(i32 %X) nounwind readnone {
ret i32 %ret
}

; We should not proceed with this fold if we can not compute
; multiplicative inverse
define i32 @test_urem_100(i32 %X) nounwind readnone {
; CHECK-LABEL: test_urem_100:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #34079
; CHECK-NEXT: movk w8, #20971, lsl #16
; CHECK-NEXT: umull x8, w0, w8
; CHECK-NEXT: lsr x8, x8, #37
; CHECK-NEXT: mov w9, #100
; CHECK-NEXT: msub w8, w8, w9, w0
; CHECK-NEXT: cmp w8, #0 // =0
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%urem = urem i32 %X, 100
%cmp = icmp eq i32 %urem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
}

; We can lower remainder of division by powers of two much better elsewhere;
; also, BuildREMEqFold does not work when the only odd factor of the divisor is 1.
; This ensures we don't touch powers of two.
Expand Down
130 changes: 130 additions & 0 deletions llvm/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
Expand Up @@ -259,6 +259,7 @@ define <4 x i32> @test_urem_even_div(<4 x i32> %X) nounwind readnone {
ret <4 x i32> %ret
}

; Can't fold due to last line
define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind readnone {
; CHECK-SSE2-LABEL: test_urem_pow2:
; CHECK-SSE2: # %bb.0:
Expand Down Expand Up @@ -373,6 +374,7 @@ define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind readnone {
ret <4 x i32> %ret
}

; Can't fold due to second line
define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
; CHECK-SSE2-LABEL: test_urem_one:
; CHECK-SSE2: # %bb.0:
Expand Down Expand Up @@ -499,6 +501,134 @@ define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
ret <4 x i32> %ret
}

; Can't fold due to second line
define <4 x i32> @test_urem_nomulinv(<4 x i32> %X) nounwind readnone {
; CHECK-SSE2-LABEL: test_urem_nomulinv:
; CHECK-SSE2: # %bb.0:
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,1374389535,2863311531,2454267027]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
; CHECK-SSE2-NEXT: psrld $1, %xmm3
; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm3[3,3]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
; CHECK-SSE2-NEXT: psrld $2, %xmm1
; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
; CHECK-SSE2-NEXT: psrld $5, %xmm3
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [6,100,12,14]
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
; CHECK-SSE2-NEXT: psrld $3, %xmm2
; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm2[2,3]
; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-SSE2-NEXT: psrld $31, %xmm0
; CHECK-SSE2-NEXT: retq
;
; CHECK-SSE41-LABEL: test_urem_nomulinv:
; CHECK-SSE41: # %bb.0:
; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
; CHECK-SSE41-NEXT: psrld $1, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2863311531,1374389535,2863311531,2454267027]
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm4
; CHECK-SSE41-NEXT: pmuludq %xmm3, %xmm1
; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
; CHECK-SSE41-NEXT: psrld $2, %xmm2
; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm3
; CHECK-SSE41-NEXT: psrld $5, %xmm3
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
; CHECK-SSE41-NEXT: psrld $3, %xmm1
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
; CHECK-SSE41-NEXT: psrld $31, %xmm0
; CHECK-SSE41-NEXT: retq
;
; CHECK-AVX1-LABEL: test_urem_nomulinv:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5],xmm1[6,7]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2863311531,1374389535,2863311531,2454267027]
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
; CHECK-AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2
; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm2
; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm3
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
; CHECK-AVX1-NEXT: vpsrld $3, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: test_urem_nomulinv:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,1374389535,2863311531,2454267027]
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512VL-LABEL: test_urem_nomulinv:
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,1374389535,2863311531,2454267027]
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: retq
%urem = urem <4 x i32> %X, <i32 6, i32 100, i32 12, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}

define <4 x i32> @test_urem_comp(<4 x i32> %X) nounwind readnone {
; CHECK-SSE2-LABEL: test_urem_comp:
; CHECK-SSE2: # %bb.0:
Expand Down
32 changes: 32 additions & 0 deletions llvm/test/CodeGen/X86/urem-seteq.ll
Expand Up @@ -223,6 +223,38 @@ define i32 @test_urem_one(i32 %X) nounwind readnone {
ret i32 %ret
}

; We should not proceed with this fold if we can not compute
; multiplicative inverse
define i32 @test_urem_100(i32 %X) nounwind readnone {
; X86-LABEL: test_urem_100:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $1374389535, %edx # imm = 0x51EB851F
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: mull %edx
; X86-NEXT: shrl $5, %edx
; X86-NEXT: imull $100, %edx, %edx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl %edx, %ecx
; X86-NEXT: sete %al
; X86-NEXT: retl
;
; X64-LABEL: test_urem_100:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: imulq $1374389535, %rax, %rax # imm = 0x51EB851F
; X64-NEXT: shrq $37, %rax
; X64-NEXT: imull $100, %eax, %ecx
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl %ecx, %edi
; X64-NEXT: sete %al
; X64-NEXT: retq
%urem = urem i32 %X, 100
%cmp = icmp eq i32 %urem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
}

; We can lower remainder of division by powers of two much better elsewhere;
; also, BuildREMEqFold does not work when the only odd factor of the divisor is 1.
; This ensures we don't touch powers of two.
Expand Down

0 comments on commit bd34e50

Please sign in to comment.