192 changes: 66 additions & 126 deletions llvm/test/CodeGen/X86/vec_shift5.ll
Original file line number Diff line number Diff line change
@@ -1,100 +1,70 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64

; Verify that we correctly fold target specific packed vector shifts by
; immediate count into a simple build_vector when the elements of the vector
; in input to the packed shift are all constants or undef.

define <8 x i16> @test1() {
; X32-LABEL: test1:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64]
; X32-NEXT: retl
;
; X64-LABEL: test1:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64]
; X64-NEXT: retq
; CHECK-LABEL: test1:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64,8,16,32,64]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 2, i16 4, i16 8>, i32 3)
ret <8 x i16> %1
}

define <8 x i16> @test2() {
; X32-LABEL: test2:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; X32-NEXT: retl
;
; X64-LABEL: test2:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; X64-NEXT: retq
; CHECK-LABEL: test2:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3)
ret <8 x i16> %1
}

define <8 x i16> @test3() {
; X32-LABEL: test3:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; X32-NEXT: retl
;
; X64-LABEL: test3:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; X64-NEXT: retq
; CHECK-LABEL: test3:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4,0,1,2,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i16 8, i16 16, i16 32>, i32 3)
ret <8 x i16> %1
}

define <4 x i32> @test4() {
; X32-LABEL: test4:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64]
; X32-NEXT: retl
;
; X64-LABEL: test4:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64]
; X64-NEXT: retq
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [8,16,32,64]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3)
ret <4 x i32> %1
}

define <4 x i32> @test5() {
; X32-LABEL: test5:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; X32-NEXT: retl
;
; X64-LABEL: test5:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; X64-NEXT: retq
; CHECK-LABEL: test5:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
ret <4 x i32> %1
}

define <4 x i32> @test6() {
; X32-LABEL: test6:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; X32-NEXT: retl
;
; X64-LABEL: test6:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; X64-NEXT: retq
; CHECK-LABEL: test6:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,2,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
ret <4 x i32> %1
}

define <2 x i64> @test7() {
; X32-LABEL: test7:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0]
; X32-NEXT: retl
; X86-LABEL: test7:
; X86: # %bb.0:
; X86-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0]
; X86-NEXT: retl
;
; X64-LABEL: test7:
; X64: # %bb.0:
Expand All @@ -105,10 +75,10 @@ define <2 x i64> @test7() {
}

define <2 x i64> @test8() {
; X32-LABEL: test8:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,0,2,0]
; X32-NEXT: retl
; X86-LABEL: test8:
; X86: # %bb.0:
; X86-NEXT: movaps {{.*#+}} xmm0 = [1,0,2,0]
; X86-NEXT: retl
;
; X64-LABEL: test8:
; X64: # %bb.0:
Expand All @@ -119,38 +89,28 @@ define <2 x i64> @test8() {
}

define <8 x i16> @test9() {
; X32-LABEL: test9:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X32-NEXT: retl
;
; X64-LABEL: test9:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X64-NEXT: retq
; CHECK-LABEL: test9:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
ret <8 x i16> %1
}

define <4 x i32> @test10() {
; X32-LABEL: test10:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; X32-NEXT: retl
;
; X64-LABEL: test10:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; X64-NEXT: retq
; CHECK-LABEL: test10:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
ret <4 x i32> %1
}

define <2 x i64> @test11() {
; X32-LABEL: test11:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,0,3,0]
; X32-NEXT: retl
; X86-LABEL: test11:
; X86: # %bb.0:
; X86-NEXT: movaps {{.*#+}} xmm0 = [0,0,3,0]
; X86-NEXT: retl
;
; X64-LABEL: test11:
; X64: # %bb.0:
Expand All @@ -161,66 +121,46 @@ define <2 x i64> @test11() {
}

define <8 x i16> @test12() {
; X32-LABEL: test12:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X32-NEXT: retl
;
; X64-LABEL: test12:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X64-NEXT: retq
; CHECK-LABEL: test12:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
ret <8 x i16> %1
}

define <4 x i32> @test13() {
; X32-LABEL: test13:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; X32-NEXT: retl
;
; X64-LABEL: test13:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; X64-NEXT: retq
; CHECK-LABEL: test13:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
ret <4 x i32> %1
}

define <8 x i16> @test14() {
; X32-LABEL: test14:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X32-NEXT: retl
;
; X64-LABEL: test14:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; X64-NEXT: retq
; CHECK-LABEL: test14:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
ret <8 x i16> %1
}

define <4 x i32> @test15() {
; X32-LABEL: test15:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256]
; X32-NEXT: retl
;
; X64-LABEL: test15:
; X64: # %bb.0:
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256]
; X64-NEXT: retq
; CHECK-LABEL: test15:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256]
; CHECK-NEXT: ret{{[l|q]}}
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
ret <4 x i32> %1
}

define <2 x i64> @test16() {
; X32-LABEL: test16:
; X32: # %bb.0:
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,0,248,0]
; X32-NEXT: retl
; X86-LABEL: test16:
; X86: # %bb.0:
; X86-NEXT: movaps {{.*#+}} xmm0 = [0,0,248,0]
; X86-NEXT: retl
;
; X64-LABEL: test16:
; X64: # %bb.0:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/vec_shift6.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX,AVX512

; Verify that we don't scalarize a packed vector shift left of 16-bit
; signed integers if the amount is a constant build_vector.
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/X86/vec_shift7.ll
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64

; Verify that we don't fail when shift by zero is encountered.

define i64 @test1(<2 x i64> %a) {
; X32-LABEL: test1:
; X32: # %bb.0: # %entry
; X32-NEXT: movd %xmm0, %eax
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X32-NEXT: movd %xmm0, %edx
; X32-NEXT: retl
; X86-LABEL: test1:
; X86: # %bb.0: # %entry
; X86-NEXT: movd %xmm0, %eax
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
; X86-NEXT: movd %xmm0, %edx
; X86-NEXT: retl
;
; X64-LABEL: test1:
; X64: # %bb.0: # %entry
Expand Down
84 changes: 42 additions & 42 deletions llvm/test/CodeGen/X86/vshift-1.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64

; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.

define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
; X32-LABEL: shift1a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psllq $32, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psllq $32, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1a:
; X64: # %bb.0: # %entry
Expand All @@ -25,13 +25,13 @@ entry:
}

define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: psllq %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1b:
; X64: # %bb.0: # %entry
Expand All @@ -49,12 +49,12 @@ entry:


define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
; X32-LABEL: shift2a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pslld $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pslld $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2a:
; X64: # %bb.0: # %entry
Expand All @@ -68,13 +68,13 @@ entry:
}

define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift2b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2b:
; X64: # %bb.0: # %entry
Expand All @@ -93,12 +93,12 @@ entry:
}

define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
; X32-LABEL: shift3a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psllw $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psllw $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3a:
; X64: # %bb.0: # %entry
Expand All @@ -113,14 +113,14 @@ entry:

; Make sure the shift amount is properly zero extended.
define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
; X32-LABEL: shift3b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd %ecx, %xmm1
; X32-NEXT: psllw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd %ecx, %xmm1
; X86-NEXT: psllw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3b:
; X64: # %bb.0: # %entry
Expand Down
84 changes: 42 additions & 42 deletions llvm/test/CodeGen/X86/vshift-2.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64

; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.

define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
; X32-LABEL: shift1a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlq $32, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlq $32, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1a:
; X64: # %bb.0: # %entry
Expand All @@ -25,13 +25,13 @@ entry:
}

define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
; X32-LABEL: shift1b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X32-NEXT: psrlq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; X86-NEXT: psrlq %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1b:
; X64: # %bb.0: # %entry
Expand All @@ -48,12 +48,12 @@ entry:
}

define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
; X32-LABEL: shift2a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrld $17, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrld $17, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2a:
; X64: # %bb.0: # %entry
Expand All @@ -67,13 +67,13 @@ entry:
}

define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift2b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psrld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psrld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2b:
; X64: # %bb.0: # %entry
Expand All @@ -93,12 +93,12 @@ entry:


define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
; X32-LABEL: shift3a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlw $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlw $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3a:
; X64: # %bb.0: # %entry
Expand All @@ -113,14 +113,14 @@ entry:

; properly zero extend the shift amount
define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
; X32-LABEL: shift3b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd %ecx, %xmm1
; X32-NEXT: psrlw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd %ecx, %xmm1
; X86-NEXT: psrlw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3b:
; X64: # %bb.0: # %entry
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/X86/vshift-3.ll
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64

; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same.

; Note that x86 does have ashr

define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
; X32-LABEL: shift1a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
; X32-NEXT: psrad $31, %xmm0
; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X32-NEXT: movdqa %xmm1, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,3,2,3]
; X86-NEXT: psrad $31, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X86-NEXT: movdqa %xmm1, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1a:
; X64: # %bb.0: # %entry
Expand All @@ -33,12 +33,12 @@ entry:
}

define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
; X32-LABEL: shift2a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrad $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrad $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2a:
; X64: # %bb.0: # %entry
Expand All @@ -52,13 +52,13 @@ entry:
}

define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift2b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psrad %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psrad %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2b:
; X64: # %bb.0: # %entry
Expand All @@ -77,12 +77,12 @@ entry:
}

define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
; X32-LABEL: shift3a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psraw $5, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psraw $5, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3a:
; X64: # %bb.0: # %entry
Expand All @@ -96,14 +96,14 @@ entry:
}

define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
; X32-LABEL: shift3b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd %ecx, %xmm1
; X32-NEXT: psraw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd %ecx, %xmm1
; X86-NEXT: psraw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3b:
; X64: # %bb.0: # %entry
Expand Down
110 changes: 55 additions & 55 deletions llvm/test/CodeGen/X86/vshift-4.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64

; test vector shifts converted to proper SSE2 vector shifts when the shift
; amounts are the same when using a shuffle splat.

define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
; X32-LABEL: shift1a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psllq %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1a:
; X64: # %bb.0: # %entry
Expand All @@ -27,16 +27,16 @@ entry:

; shift1b can't use a packed shift but can shift lanes separately and shuffle back together
define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
; X32-LABEL: shift1b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movdqa %xmm0, %xmm2
; X32-NEXT: psllq %xmm1, %xmm2
; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X32-NEXT: psllq %xmm1, %xmm0
; X32-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; X32-NEXT: movapd %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift1b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movdqa %xmm0, %xmm2
; X86-NEXT: psllq %xmm1, %xmm2
; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; X86-NEXT: psllq %xmm1, %xmm0
; X86-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; X86-NEXT: movapd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift1b:
; X64: # %bb.0: # %entry
Expand All @@ -55,13 +55,13 @@ entry:
}

define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
; X32-LABEL: shift2a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlq $32, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2a:
; X64: # %bb.0: # %entry
Expand All @@ -77,13 +77,13 @@ entry:
}

define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
; X32-LABEL: shift2b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlq $32, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2b:
; X64: # %bb.0: # %entry
Expand All @@ -99,13 +99,13 @@ entry:
}

define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
; X32-LABEL: shift2c:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: psrlq $32, %xmm1
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift2c:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psrlq $32, %xmm1
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift2c:
; X64: # %bb.0: # %entry
Expand All @@ -121,14 +121,14 @@ entry:
}

define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
; X32-LABEL: shift3a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,6,6]
; X32-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X32-NEXT: psllw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,6,6,6,6]
; X86-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; X86-NEXT: psllw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3a:
; X64: # %bb.0: # %entry
Expand All @@ -145,14 +145,14 @@ entry:
}

define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
; X32-LABEL: shift3b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd %ecx, %xmm1
; X32-NEXT: psllw %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift3b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd %ecx, %xmm1
; X86-NEXT: psllw %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift3b:
; X64: # %bb.0: # %entry
Expand Down
64 changes: 32 additions & 32 deletions llvm/test/CodeGen/X86/vshift-5.ll
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64

; When loading the shift amount from memory, avoid generating the splat.

define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
; X32-LABEL: shift5a:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift5a:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift5a:
; X64: # %bb.0: # %entry
Expand All @@ -31,14 +31,14 @@ entry:


define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
; X32-LABEL: shift5b:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psrad %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift5b:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psrad %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift5b:
; X64: # %bb.0: # %entry
Expand All @@ -57,13 +57,13 @@ entry:


define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift5c:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: pslld %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift5c:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: pslld %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift5c:
; X64: # %bb.0: # %entry
Expand All @@ -81,13 +81,13 @@ entry:


define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
; X32-LABEL: shift5d:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: psrad %xmm1, %xmm0
; X32-NEXT: movdqa %xmm0, (%eax)
; X32-NEXT: retl
; X86-LABEL: shift5d:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: psrad %xmm1, %xmm0
; X86-NEXT: movdqa %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shift5d:
; X64: # %bb.0: # %entry
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/X86/vshift-6.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64

; This test makes sure that the compiler does not crash with an
; assertion failure when trying to fold a vector shift left
Expand All @@ -25,42 +25,42 @@
; 'count' is the vector shift count.

define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
; X32-LABEL: do_not_crash:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movb %al, (%ecx)
; X32-NEXT: movd %eax, %xmm0
; X32-NEXT: psllq $56, %xmm0
; X32-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
; X32-NEXT: movdqa %xmm2, %xmm1
; X32-NEXT: pandn %xmm0, %xmm1
; X32-NEXT: por %xmm2, %xmm1
; X32-NEXT: pcmpeqd %xmm2, %xmm2
; X32-NEXT: psllw $5, %xmm1
; X32-NEXT: pxor %xmm3, %xmm3
; X32-NEXT: pxor %xmm0, %xmm0
; X32-NEXT: pcmpgtb %xmm1, %xmm0
; X32-NEXT: pxor %xmm0, %xmm2
; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-NEXT: por %xmm2, %xmm0
; X32-NEXT: paddb %xmm1, %xmm1
; X32-NEXT: pxor %xmm2, %xmm2
; X32-NEXT: pcmpgtb %xmm1, %xmm2
; X32-NEXT: movdqa %xmm2, %xmm4
; X32-NEXT: pandn %xmm0, %xmm4
; X32-NEXT: psllw $2, %xmm0
; X32-NEXT: pand %xmm2, %xmm0
; X32-NEXT: pand {{\.LCPI.*}}, %xmm0
; X32-NEXT: por %xmm4, %xmm0
; X32-NEXT: paddb %xmm1, %xmm1
; X32-NEXT: pcmpgtb %xmm1, %xmm3
; X32-NEXT: movdqa %xmm3, %xmm1
; X32-NEXT: pandn %xmm0, %xmm1
; X32-NEXT: paddb %xmm0, %xmm0
; X32-NEXT: pand %xmm3, %xmm0
; X32-NEXT: por %xmm1, %xmm0
; X32-NEXT: retl
; X86-LABEL: do_not_crash:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movb %al, (%ecx)
; X86-NEXT: movd %eax, %xmm0
; X86-NEXT: psllq $56, %xmm0
; X86-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
; X86-NEXT: movdqa %xmm2, %xmm1
; X86-NEXT: pandn %xmm0, %xmm1
; X86-NEXT: por %xmm2, %xmm1
; X86-NEXT: pcmpeqd %xmm2, %xmm2
; X86-NEXT: psllw $5, %xmm1
; X86-NEXT: pxor %xmm3, %xmm3
; X86-NEXT: pxor %xmm0, %xmm0
; X86-NEXT: pcmpgtb %xmm1, %xmm0
; X86-NEXT: pxor %xmm0, %xmm2
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-NEXT: por %xmm2, %xmm0
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: pxor %xmm2, %xmm2
; X86-NEXT: pcmpgtb %xmm1, %xmm2
; X86-NEXT: movdqa %xmm2, %xmm4
; X86-NEXT: pandn %xmm0, %xmm4
; X86-NEXT: psllw $2, %xmm0
; X86-NEXT: pand %xmm2, %xmm0
; X86-NEXT: pand {{\.LCPI.*}}, %xmm0
; X86-NEXT: por %xmm4, %xmm0
; X86-NEXT: paddb %xmm1, %xmm1
; X86-NEXT: pcmpgtb %xmm1, %xmm3
; X86-NEXT: movdqa %xmm3, %xmm1
; X86-NEXT: pandn %xmm0, %xmm1
; X86-NEXT: paddb %xmm0, %xmm0
; X86-NEXT: pand %xmm3, %xmm0
; X86-NEXT: por %xmm1, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: do_not_crash:
; X64: # %bb.0: # %entry
Expand Down
10 changes: 9 additions & 1 deletion llvm/test/CodeGen/X86/vshift_split.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s

; Example that requires splitting and expanding a vector shift.
define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
; CHECK-LABEL: update:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlq $2, %xmm1
; CHECK-NEXT: psrlq $3, %xmm0
; CHECK-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; CHECK-NEXT: retl
entry:
%shr = lshr <2 x i64> %val, < i64 2, i64 3 >
ret <2 x i64> %shr
Expand Down
11 changes: 10 additions & 1 deletion llvm/test/CodeGen/X86/vshift_split2.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
; RUN: llc < %s -mtriple=i686-- -mcpu=yonah
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-- -mcpu=yonah | FileCheck %s

; Legalization example that requires splitting a large vector into smaller pieces.

define void @update(<8 x i32> %val, <8 x i32>* %dst) nounwind {
; CHECK-LABEL: update:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: psrad $2, %xmm0
; CHECK-NEXT: psrad $4, %xmm1
; CHECK-NEXT: movdqa %xmm1, 16(%eax)
; CHECK-NEXT: movdqa %xmm0, (%eax)
; CHECK-NEXT: retl
entry:
%shl = shl <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
%shr = ashr <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
Expand Down
331 changes: 114 additions & 217 deletions llvm/test/CodeGen/X86/x86-shifts.ll

Large diffs are not rendered by default.

14 changes: 1 addition & 13 deletions llvm/test/Transforms/InstCombine/bswap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -596,19 +596,7 @@ define i64 @bswap_and_mask_2(i64 %0) {

define i64 @bswap_trunc(i64 %x01234567) {
; CHECK-LABEL: @bswap_trunc(
; CHECK-NEXT: [[X7ZZZZZZZ:%.*]] = shl i64 [[X01234567:%.*]], 56
; CHECK-NEXT: [[XZ0123456:%.*]] = lshr i64 [[X01234567]], 8
; CHECK-NEXT: [[XZZZZZ012:%.*]] = lshr i64 [[X01234567]], 40
; CHECK-NEXT: [[X3456:%.*]] = trunc i64 [[XZ0123456]] to i32
; CHECK-NEXT: [[XZ012:%.*]] = trunc i64 [[XZZZZZ012]] to i32
; CHECK-NEXT: [[X6543:%.*]] = call i32 @llvm.bswap.i32(i32 [[X3456]])
; CHECK-NEXT: [[X210Z:%.*]] = call i32 @llvm.bswap.i32(i32 [[XZ012]])
; CHECK-NEXT: [[XZ210:%.*]] = lshr exact i32 [[X210Z]], 8
; CHECK-NEXT: [[XZZZZ6543:%.*]] = zext i32 [[X6543]] to i64
; CHECK-NEXT: [[XZZZZZ210:%.*]] = zext i32 [[XZ210]] to i64
; CHECK-NEXT: [[XZ6543ZZZ:%.*]] = shl nuw nsw i64 [[XZZZZ6543]], 24
; CHECK-NEXT: [[XZ6543210:%.*]] = or i64 [[XZ6543ZZZ]], [[XZZZZZ210]]
; CHECK-NEXT: [[X76543210:%.*]] = or i64 [[XZ6543210]], [[X7ZZZZZZZ]]
; CHECK-NEXT: [[X76543210:%.*]] = call i64 @llvm.bswap.i64(i64 [[X01234567:%.*]])
; CHECK-NEXT: ret i64 [[X76543210]]
;
%x7zzzzzzz = shl i64 %x01234567, 56
Expand Down