21 changes: 12 additions & 9 deletions llvm/test/CodeGen/X86/avx512-broadcast-unfold.ll
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,13 @@ define void @bcast_unfold_mul_v16i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_mul_v16i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB6_1: # %bb2
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vpmulld 4096(%rdi,%rax), %zmm0, %zmm1
; CHECK-NEXT: vmovdqu64 %zmm1, 4096(%rdi,%rax)
; CHECK-NEXT: vmovdqu64 4096(%rdi,%rax), %zmm0
; CHECK-NEXT: vpaddd %zmm0, %zmm0, %zmm1
; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vmovdqu64 %zmm0, 4096(%rdi,%rax)
; CHECK-NEXT: addq $64, %rax
; CHECK-NEXT: jne .LBB6_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -244,12 +245,13 @@ define void @bcast_unfold_mul_v8i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_mul_v8i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm0 = [3,3,3,3,3,3,3,3]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB7_1: # %bb2
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vpmulld 4096(%rdi,%rax), %ymm0, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, 4096(%rdi,%rax)
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %ymm0
; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm1
; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vmovdqu %ymm0, 4096(%rdi,%rax)
; CHECK-NEXT: addq $32, %rax
; CHECK-NEXT: jne .LBB7_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down Expand Up @@ -278,12 +280,13 @@ define void @bcast_unfold_mul_v4i32(i32* %arg) {
; CHECK-LABEL: bcast_unfold_mul_v4i32:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm0 = [3,3,3,3]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB8_1: # %bb2
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vpmulld 4096(%rdi,%rax), %xmm0, %xmm1
; CHECK-NEXT: vmovdqu %xmm1, 4096(%rdi,%rax)
; CHECK-NEXT: vmovdqu 4096(%rdi,%rax), %xmm0
; CHECK-NEXT: vpaddd %xmm0, %xmm0, %xmm1
; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vmovdqu %xmm0, 4096(%rdi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: jne .LBB8_1
; CHECK-NEXT: # %bb.2: # %bb10
Expand Down
15 changes: 9 additions & 6 deletions llvm/test/CodeGen/X86/combine-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,16 @@ define void @PR52039(ptr %pa, ptr %pb) {
; SSE-NEXT: movdqa %xmm2, %xmm3
; SSE-NEXT: psubd %xmm1, %xmm3
; SSE-NEXT: psubd %xmm0, %xmm2
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [3,3,3,3]
; SSE-NEXT: movdqu %xmm2, (%rsi)
; SSE-NEXT: pmulld %xmm0, %xmm2
; SSE-NEXT: pmulld %xmm3, %xmm0
; SSE-NEXT: movdqa %xmm2, %xmm0
; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm3, %xmm1
; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: movdqu %xmm3, 16(%rsi)
; SSE-NEXT: movdqu %xmm0, 16(%rdi)
; SSE-NEXT: movdqu %xmm2, (%rdi)
; SSE-NEXT: movdqu %xmm2, (%rsi)
; SSE-NEXT: movdqu %xmm1, 16(%rdi)
; SSE-NEXT: movdqu %xmm0, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: PR52039:
Expand Down
12 changes: 8 additions & 4 deletions llvm/test/CodeGen/X86/rotate-extract-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,16 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
define <4 x i32> @vrolw_extract_mul_with_mask(<4 x i32> %i) nounwind {
; X86-LABEL: vrolw_extract_mul_with_mask:
; X86: # %bb.0:
; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; X86-NEXT: vpslld $3, %xmm0, %xmm1
; X86-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; X86-NEXT: vprold $7, %xmm0, %xmm0
; X86-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: vrolw_extract_mul_with_mask:
; X64: # %bb.0:
; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-NEXT: vpslld $3, %xmm0, %xmm1
; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; X64-NEXT: vprold $7, %xmm0, %xmm0
; X64-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; X64-NEXT: retq
Expand Down Expand Up @@ -194,15 +196,17 @@ define <8 x i32> @no_extract_mul(<8 x i32> %i) nounwind {
; X86-LABEL: no_extract_mul:
; X86: # %bb.0:
; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm1
; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
; X86-NEXT: vpslld $3, %ymm0, %ymm2
; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X86-NEXT: vpsrld $23, %ymm0, %ymm0
; X86-NEXT: vpor %ymm0, %ymm1, %ymm0
; X86-NEXT: retl
;
; X64-LABEL: no_extract_mul:
; X64: # %bb.0:
; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm1
; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
; X64-NEXT: vpslld $3, %ymm0, %ymm2
; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X64-NEXT: vpsrld $23, %ymm0, %ymm0
; X64-NEXT: vpor %ymm0, %ymm1, %ymm0
; X64-NEXT: retq
Expand Down
28 changes: 17 additions & 11 deletions llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -487,8 +487,10 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
; SSE41-NEXT: psrld $31, %xmm1
; SSE41-NEXT: psrad $2, %xmm2
; SSE41-NEXT: paddd %xmm1, %xmm2
; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE41-NEXT: psubd %xmm2, %xmm0
; SSE41-NEXT: movdqa %xmm2, %xmm1
; SSE41-NEXT: pslld $3, %xmm1
; SSE41-NEXT: psubd %xmm1, %xmm2
; SSE41-NEXT: paddd %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test_rem7_4i32:
Expand All @@ -503,8 +505,9 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
; AVX1-NEXT: vpsrad $2, %xmm1, %xmm1
; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $3, %xmm1, %xmm2
; AVX1-NEXT: vpsubd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_rem7_4i32:
Expand All @@ -519,9 +522,9 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
; AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
; AVX2-NEXT: vpsrad $2, %xmm1, %xmm1
; AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
; AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpslld $3, %xmm1, %xmm2
; AVX2-NEXT: vpsubd %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%res = srem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %res
Expand All @@ -536,8 +539,10 @@ define <8 x i16> @test_rem7_8i16(<8 x i16> %a) nounwind {
; SSE-NEXT: psrlw $15, %xmm2
; SSE-NEXT: psraw $1, %xmm1
; SSE-NEXT: paddw %xmm2, %xmm1
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: psubw %xmm1, %xmm0
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psllw $3, %xmm2
; SSE-NEXT: psubw %xmm2, %xmm1
; SSE-NEXT: paddw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_rem7_8i16:
Expand All @@ -546,8 +551,9 @@ define <8 x i16> @test_rem7_8i16(<8 x i16> %a) nounwind {
; AVX-NEXT: vpsrlw $15, %xmm1, %xmm2
; AVX-NEXT: vpsraw $1, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsllw $3, %xmm1, %xmm2
; AVX-NEXT: vpsubw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%res = srem <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %res
Expand Down
11 changes: 6 additions & 5 deletions llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -497,9 +497,9 @@ define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind {
; AVX2-NEXT: vpsrld $31, %ymm1, %ymm2
; AVX2-NEXT: vpsrad $2, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7]
; AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpslld $3, %ymm1, %ymm2
; AVX2-NEXT: vpsubd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%res = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <8 x i32> %res
Expand Down Expand Up @@ -533,8 +533,9 @@ define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind {
; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm2
; AVX2-NEXT: vpsraw $1, %ymm1, %ymm1
; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsllw $3, %ymm1, %ymm2
; AVX2-NEXT: vpsubw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%res = srem <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <16 x i16> %res
Expand Down
10 changes: 6 additions & 4 deletions llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -400,8 +400,9 @@ define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
; AVX-NEXT: vpsrld $31, %zmm1, %zmm2
; AVX-NEXT: vpsrad $2, %zmm1, %zmm1
; AVX-NEXT: vpaddd %zmm2, %zmm1, %zmm1
; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX-NEXT: vpsubd %zmm1, %zmm0, %zmm0
; AVX-NEXT: vpslld $3, %zmm1, %zmm2
; AVX-NEXT: vpsubd %zmm2, %zmm1, %zmm1
; AVX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX-NEXT: retq
%res = srem <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <16 x i32> %res
Expand Down Expand Up @@ -435,8 +436,9 @@ define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $15, %zmm1, %zmm2
; AVX512BW-NEXT: vpsraw $1, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
; AVX512BW-NEXT: vpsubw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = srem <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <32 x i16> %res
Expand Down
28 changes: 17 additions & 11 deletions llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,10 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
; SSE41-NEXT: psrld $1, %xmm1
; SSE41-NEXT: paddd %xmm2, %xmm1
; SSE41-NEXT: psrld $2, %xmm1
; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE41-NEXT: psubd %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm1, %xmm2
; SSE41-NEXT: pslld $3, %xmm2
; SSE41-NEXT: psubd %xmm2, %xmm1
; SSE41-NEXT: paddd %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test_rem7_4i32:
Expand All @@ -574,8 +576,9 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm2
; AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpslld $3, %xmm1, %xmm2
; AVX1-NEXT: vpsubd %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_rem7_4i32:
Expand All @@ -590,9 +593,9 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
; AVX2-NEXT: vpsrld $1, %xmm2, %xmm2
; AVX2-NEXT: vpaddd %xmm1, %xmm2, %xmm1
; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
; AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpslld $3, %xmm1, %xmm2
; AVX2-NEXT: vpsubd %xmm2, %xmm1, %xmm1
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
%res = urem <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
ret <4 x i32> %res
Expand All @@ -608,8 +611,10 @@ define <8 x i16> @test_rem7_8i16(<8 x i16> %a) nounwind {
; SSE-NEXT: psrlw $1, %xmm2
; SSE-NEXT: paddw %xmm1, %xmm2
; SSE-NEXT: psrlw $2, %xmm2
; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE-NEXT: psubw %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm2, %xmm1
; SSE-NEXT: psllw $3, %xmm1
; SSE-NEXT: psubw %xmm1, %xmm2
; SSE-NEXT: paddw %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_rem7_8i16:
Expand All @@ -619,8 +624,9 @@ define <8 x i16> @test_rem7_8i16(<8 x i16> %a) nounwind {
; AVX-NEXT: vpsrlw $1, %xmm2, %xmm2
; AVX-NEXT: vpaddw %xmm1, %xmm2, %xmm1
; AVX-NEXT: vpsrlw $2, %xmm1, %xmm1
; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpsllw $3, %xmm1, %xmm2
; AVX-NEXT: vpsubw %xmm2, %xmm1, %xmm1
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%res = urem <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <8 x i16> %res
Expand Down
11 changes: 6 additions & 5 deletions llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll
Original file line number Diff line number Diff line change
Expand Up @@ -528,9 +528,9 @@ define <8 x i32> @test_rem7_8i32(<8 x i32> %a) nounwind {
; AVX2-NEXT: vpsrld $1, %ymm2, %ymm2
; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsrld $2, %ymm1, %ymm1
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [7,7,7,7,7,7,7,7]
; AVX2-NEXT: vpmulld %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpslld $3, %ymm1, %ymm2
; AVX2-NEXT: vpsubd %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%res = urem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <8 x i32> %res
Expand Down Expand Up @@ -567,8 +567,9 @@ define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind {
; AVX2-NEXT: vpsrlw $1, %ymm2, %ymm2
; AVX2-NEXT: vpaddw %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vpsrlw $2, %ymm1, %ymm1
; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpsllw $3, %ymm1, %ymm2
; AVX2-NEXT: vpsubw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
%res = urem <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <16 x i16> %res
Expand Down
10 changes: 6 additions & 4 deletions llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,9 @@ define <16 x i32> @test_rem7_16i32(<16 x i32> %a) nounwind {
; AVX-NEXT: vpsrld $1, %zmm1, %zmm1
; AVX-NEXT: vpaddd %zmm3, %zmm1, %zmm1
; AVX-NEXT: vpsrld $2, %zmm1, %zmm1
; AVX-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1
; AVX-NEXT: vpsubd %zmm1, %zmm0, %zmm0
; AVX-NEXT: vpslld $3, %zmm1, %zmm2
; AVX-NEXT: vpsubd %zmm2, %zmm1, %zmm1
; AVX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; AVX-NEXT: retq
%res = urem <16 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
ret <16 x i32> %res
Expand Down Expand Up @@ -458,8 +459,9 @@ define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind {
; AVX512BW-NEXT: vpsrlw $1, %zmm2, %zmm2
; AVX512BW-NEXT: vpaddw %zmm1, %zmm2, %zmm1
; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm1
; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpsllw $3, %zmm1, %zmm2
; AVX512BW-NEXT: vpsubw %zmm2, %zmm1, %zmm1
; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
%res = urem <32 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
ret <32 x i16> %res
Expand Down
357 changes: 129 additions & 228 deletions llvm/test/CodeGen/X86/vector-mul.ll

Large diffs are not rendered by default.

163 changes: 163 additions & 0 deletions llvm/test/Transforms/InstCombine/icmp-equality-rotate.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

declare i8 @llvm.fshl.i8(i8, i8, i8)
declare i8 @llvm.fshr.i8(i8, i8, i8)
declare void @use.i8(i8)
define i1 @cmpeq_rorr_to_rorl(i8 %x, i8 %C) {
; CHECK-LABEL: @cmpeq_rorr_to_rorl(
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[C:%.*]])
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], [[X]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C)
%r = icmp eq i8 %x_rorr, %x
ret i1 %r
}

define i1 @cmpeq_rorr_to_rorl_non_equality_fail(i8 %x, i8 %C) {
; CHECK-LABEL: @cmpeq_rorr_to_rorl_non_equality_fail(
; CHECK-NEXT: [[X_RORR:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[C:%.*]])
; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[X_RORR]], [[X]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C)
%r = icmp ult i8 %x_rorr, %x
ret i1 %r
}

define i1 @cmpeq_rorr_to_rorl_cmp_against_wrong_val_fail(i8 %x, i8 %C) {
; CHECK-LABEL: @cmpeq_rorr_to_rorl_cmp_against_wrong_val_fail(
; CHECK-NEXT: [[X_RORR:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[C:%.*]])
; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[X_RORR]], [[C]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C)
%r = icmp ult i8 %x_rorr, %C
ret i1 %r
}

define i1 @cmpeq_rorr_to_rorl_non_ror_fail(i8 %x, i8 %C) {
; CHECK-LABEL: @cmpeq_rorr_to_rorl_non_ror_fail(
; CHECK-NEXT: [[X_RORR:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[C:%.*]], i8 [[C]])
; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[X_RORR]], [[X]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr = call i8 @llvm.fshr.i8(i8 %x, i8 %C, i8 %C)
%r = icmp ult i8 %x_rorr, %x
ret i1 %r
}

define i1 @cmpeq_rorr_to_rorl_multiuse_fail(i8 %x, i8 %C) {
; CHECK-LABEL: @cmpeq_rorr_to_rorl_multiuse_fail(
; CHECK-NEXT: [[X_RORR:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[C:%.*]])
; CHECK-NEXT: call void @use.i8(i8 [[X_RORR]])
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X_RORR]], [[X]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C)
call void @use.i8(i8 %x_rorr)
%r = icmp eq i8 %x_rorr, %x
ret i1 %r
}

define i1 @cmpne_rorr_rorr(i8 %x, i8 %C0, i8 %C1) {
; CHECK-LABEL: @cmpne_rorr_rorr(
; CHECK-NEXT: [[TMP1:%.*]] = sub i8 [[C0:%.*]], [[C1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[TMP1]])
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], [[X]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr0 = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C0)
%x_rorr1 = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C1)
%r = icmp ne i8 %x_rorr0, %x_rorr1
ret i1 %r
}

define i1 @cmpne_rorrX_rorrY(i8 %x, i8 %y, i8 %C0, i8 %C1) {
; CHECK-LABEL: @cmpne_rorrX_rorrY(
; CHECK-NEXT: [[TMP1:%.*]] = sub i8 [[C0:%.*]], [[C1:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[TMP1]])
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr0 = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C0)
%y_rorr1 = call i8 @llvm.fshr.i8(i8 %y, i8 %y, i8 %C1)
%r = icmp ne i8 %x_rorr0, %y_rorr1
ret i1 %r
}


define i1 @cmpne_rorr_rorr_non_equality_fail(i8 %x, i8 %C0, i8 %C1) {
; CHECK-LABEL: @cmpne_rorr_rorr_non_equality_fail(
; CHECK-NEXT: [[X_RORR0:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[C0:%.*]])
; CHECK-NEXT: [[X_RORR1:%.*]] = call i8 @llvm.fshr.i8(i8 [[X]], i8 [[X]], i8 [[C1:%.*]])
; CHECK-NEXT: [[R:%.*]] = icmp sge i8 [[X_RORR0]], [[X_RORR1]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr0 = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C0)
%x_rorr1 = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C1)
%r = icmp sge i8 %x_rorr0, %x_rorr1
ret i1 %r
}


define i1 @cmpne_rorr_rorl_todo_mismatch_C(i8 %x, i8 %C0, i8 %C1) {
; CHECK-LABEL: @cmpne_rorr_rorl_todo_mismatch_C(
; CHECK-NEXT: [[X_RORR0:%.*]] = call i8 @llvm.fshr.i8(i8 [[X:%.*]], i8 [[X]], i8 [[C0:%.*]])
; CHECK-NEXT: [[X_RORL1:%.*]] = call i8 @llvm.fshl.i8(i8 [[X]], i8 [[X]], i8 3)
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X_RORR0]], [[X_RORL1]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorr0 = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 %C0)
%x_rorl1 = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
%r = icmp ne i8 %x_rorr0, %x_rorl1
ret i1 %r
}

define i1 @cmpne_rorl_rorl_multiuse1_fail(i8 %x, i8 %C0) {
; CHECK-LABEL: @cmpne_rorl_rorl_multiuse1_fail(
; CHECK-NEXT: [[X_RORL0:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[C0:%.*]])
; CHECK-NEXT: [[X_RORL1:%.*]] = call i8 @llvm.fshl.i8(i8 [[X]], i8 [[X]], i8 3)
; CHECK-NEXT: call void @use.i8(i8 [[X_RORL1]])
; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[X_RORL0]], [[X_RORL1]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorl0 = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 %C0)
%x_rorl1 = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
call void @use.i8(i8 %x_rorl1)
%r = icmp ne i8 %x_rorl0, %x_rorl1
ret i1 %r
}

define i1 @cmpeq_rorlXC_rorlYC_multiuse1(i8 %x, i8 %y) {
; CHECK-LABEL: @cmpeq_rorlXC_rorlYC_multiuse1(
; CHECK-NEXT: [[Y_RORL1:%.*]] = call i8 @llvm.fshl.i8(i8 [[Y:%.*]], i8 [[Y]], i8 3)
; CHECK-NEXT: call void @use.i8(i8 [[Y_RORL1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 3)
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP1]], [[Y]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorl0 = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 6)
%y_rorl1 = call i8 @llvm.fshl.i8(i8 %y, i8 %y, i8 3)
call void @use.i8(i8 %y_rorl1)
%r = icmp eq i8 %x_rorl0, %y_rorl1
ret i1 %r
}

define i1 @cmpeq_rorlC_rorlC_multiuse2_fail(i8 %x) {
; CHECK-LABEL: @cmpeq_rorlC_rorlC_multiuse2_fail(
; CHECK-NEXT: [[X_RORL0:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 6)
; CHECK-NEXT: [[X_RORL1:%.*]] = call i8 @llvm.fshl.i8(i8 [[X]], i8 [[X]], i8 3)
; CHECK-NEXT: call void @use.i8(i8 [[X_RORL0]])
; CHECK-NEXT: call void @use.i8(i8 [[X_RORL1]])
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[X_RORL0]], [[X_RORL1]]
; CHECK-NEXT: ret i1 [[R]]
;
%x_rorl0 = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 6)
%x_rorl1 = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
call void @use.i8(i8 %x_rorl0)
call void @use.i8(i8 %x_rorl1)
%r = icmp eq i8 %x_rorl0, %x_rorl1
ret i1 %r
}
6 changes: 3 additions & 3 deletions llvm/test/Transforms/InstCombine/icmp-rotate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,9 @@ define i1 @wrong_pred(i8 %x, i8 %y, i8 %z) {
; negative test - rotate amounts mismatch
define i1 @amounts_mismatch(i8 %x, i8 %y, i8 %z, i8 %w) {
; CHECK-LABEL: @amounts_mismatch(
; CHECK-NEXT: [[F:%.*]] = tail call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[Z:%.*]])
; CHECK-NEXT: [[F2:%.*]] = tail call i8 @llvm.fshl.i8(i8 [[Y:%.*]], i8 [[Y]], i8 [[W:%.*]])
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[F]], [[F2]]
; CHECK-NEXT: [[TMP1:%.*]] = sub i8 [[Z:%.*]], [[W:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fshl.i8(i8 [[X:%.*]], i8 [[X]], i8 [[TMP1]])
; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[R]]
;
%f = tail call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 %z)
Expand Down