11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2- ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 -x86-asm-syntax=intel | FileCheck %s -check-prefix=SSSE3
3- ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 -x86-asm-syntax=intel | FileCheck %s -check-prefix=AVX2
2+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ssse3 | FileCheck %s -check-prefix=SSSE3
3+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s -check-prefix=AVX2
44
55define <8 x i16 > @phaddsw_v8i16_intrinsic (<8 x i16 > %a , <8 x i16 > %b ) {
66; SSSE3-LABEL: phaddsw_v8i16_intrinsic:
77; SSSE3: # %bb.0: # %entry
8- ; SSSE3-NEXT: phaddsw xmm0, xmm1
9- ; SSSE3-NEXT: ret
8+ ; SSSE3-NEXT: phaddsw %xmm1, %xmm0
9+ ; SSSE3-NEXT: retq
1010;
1111; AVX2-LABEL: phaddsw_v8i16_intrinsic:
1212; AVX2: # %bb.0: # %entry
13- ; AVX2-NEXT: vphaddsw xmm0, xmm0, xmm1
14- ; AVX2-NEXT: ret
13+ ; AVX2-NEXT: vphaddsw %xmm1, % xmm0, %xmm0
14+ ; AVX2-NEXT: retq
1515entry:
1616 %res = call <8 x i16 > @llvm.x86.ssse3.phadd.sw.128 (<8 x i16 > %a , <8 x i16 > %b )
1717 ret <8 x i16 > %res
@@ -20,56 +20,50 @@ entry:
2020define <8 x i16 > @phaddsw_v8i16_generic (<8 x i16 > %a , <8 x i16 > %b ) {
2121; SSSE3-LABEL: phaddsw_v8i16_generic:
2222; SSSE3: # %bb.0: # %entry
23- ; SSSE3-NEXT: phaddsw xmm0, xmm1
24- ; SSSE3-NEXT: ret
23+ ; SSSE3-NEXT: phaddsw %xmm1, %xmm0
24+ ; SSSE3-NEXT: retq
2525;
2626; AVX2-LABEL: phaddsw_v8i16_generic:
2727; AVX2: # %bb.0: # %entry
28- ; AVX2-NEXT: vphaddsw xmm0, xmm0, xmm1
29- ; AVX2-NEXT: ret
28+ ; AVX2-NEXT: vphaddsw %xmm1, % xmm0, %xmm0
29+ ; AVX2-NEXT: retq
3030entry:
31- %even = shufflevector <8 x i16 > %a , <8 x i16 > %b ,
32- <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
33- %odd = shufflevector <8 x i16 > %a , <8 x i16 > %b ,
34- <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
31+ %even = shufflevector <8 x i16 > %a , <8 x i16 > %b , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
32+ %odd = shufflevector <8 x i16 > %a , <8 x i16 > %b , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
3533 %sum = call <8 x i16 > @llvm.sadd.sat.v8i16 (<8 x i16 > %even , <8 x i16 > %odd )
3634 ret <8 x i16 > %sum
3735}
3836
3937define <16 x i16 > @phaddsw_v16i16_generic (<16 x i16 > %a , <16 x i16 > %b ) {
4038; SSSE3-LABEL: phaddsw_v16i16_generic:
4139; SSSE3: # %bb.0: # %entry
42- ; SSSE3-NEXT: phaddsw xmm0, xmm1
43- ; SSSE3-NEXT: phaddsw xmm2, xmm3
44- ; SSSE3-NEXT: movdqa xmm1, xmm2
45- ; SSSE3-NEXT: ret
40+ ; SSSE3-NEXT: phaddsw %xmm1, %xmm0
41+ ; SSSE3-NEXT: phaddsw %xmm3, %xmm2
42+ ; SSSE3-NEXT: movdqa %xmm2, %xmm1
43+ ; SSSE3-NEXT: retq
4644;
4745; AVX2-LABEL: phaddsw_v16i16_generic:
4846; AVX2: # %bb.0: # %entry
49- ; AVX2-NEXT: vphaddsw ymm0, ymm0, ymm1
47+ ; AVX2-NEXT: vphaddsw %ymm1, % ymm0, %ymm0
5048; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
51- ; AVX2-NEXT: ret
49+ ; AVX2-NEXT: retq
5250entry:
53- %even = shufflevector <16 x i16 > %a , <16 x i16 > %b ,
54- <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 ,
55- i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
56- %odd = shufflevector <16 x i16 > %a , <16 x i16 > %b ,
57- <16 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 ,
58- i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 >
51+ %even = shufflevector <16 x i16 > %a , <16 x i16 > %b , <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
52+ %odd = shufflevector <16 x i16 > %a , <16 x i16 > %b , <16 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 >
5953 %sum = call <16 x i16 > @llvm.sadd.sat.v16i16 (<16 x i16 > %even , <16 x i16 > %odd )
6054 ret <16 x i16 > %sum
6155}
6256
6357define <8 x i16 > @phsubsw_v8i16_intrinsic (<8 x i16 > %a , <8 x i16 > %b ) {
6458; SSSE3-LABEL: phsubsw_v8i16_intrinsic:
6559; SSSE3: # %bb.0: # %entry
66- ; SSSE3-NEXT: phsubsw xmm0, xmm1
67- ; SSSE3-NEXT: ret
60+ ; SSSE3-NEXT: phsubsw %xmm1, %xmm0
61+ ; SSSE3-NEXT: retq
6862;
6963; AVX2-LABEL: phsubsw_v8i16_intrinsic:
7064; AVX2: # %bb.0: # %entry
71- ; AVX2-NEXT: vphsubsw xmm0, xmm0, xmm1
72- ; AVX2-NEXT: ret
65+ ; AVX2-NEXT: vphsubsw %xmm1, % xmm0, %xmm0
66+ ; AVX2-NEXT: retq
7367entry:
7468 %res = call <8 x i16 > @llvm.x86.ssse3.phsub.sw.128 (<8 x i16 > %a , <8 x i16 > %b )
7569 ret <8 x i16 > %res
@@ -78,42 +72,36 @@ entry:
7872define <8 x i16 > @phsubsw_v8i16_generic (<8 x i16 > %a , <8 x i16 > %b ) {
7973; SSSE3-LABEL: phsubsw_v8i16_generic:
8074; SSSE3: # %bb.0: # %entry
81- ; SSSE3-NEXT: phsubsw xmm0, xmm1
82- ; SSSE3-NEXT: ret
75+ ; SSSE3-NEXT: phsubsw %xmm1, %xmm0
76+ ; SSSE3-NEXT: retq
8377;
8478; AVX2-LABEL: phsubsw_v8i16_generic:
8579; AVX2: # %bb.0: # %entry
86- ; AVX2-NEXT: vphsubsw xmm0, xmm0, xmm1
87- ; AVX2-NEXT: ret
80+ ; AVX2-NEXT: vphsubsw %xmm1, % xmm0, %xmm0
81+ ; AVX2-NEXT: retq
8882entry:
89- %even = shufflevector <8 x i16 > %a , <8 x i16 > %b ,
90- <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
91- %odd = shufflevector <8 x i16 > %a , <8 x i16 > %b ,
92- <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
83+ %even = shufflevector <8 x i16 > %a , <8 x i16 > %b , <8 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 >
84+ %odd = shufflevector <8 x i16 > %a , <8 x i16 > %b , <8 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 >
9385 %diff = call <8 x i16 > @llvm.ssub.sat.v8i16 (<8 x i16 > %even , <8 x i16 > %odd )
9486 ret <8 x i16 > %diff
9587}
9688
9789define <16 x i16 > @phsubsw_v16i16_generic (<16 x i16 > %a , <16 x i16 > %b ) {
9890; SSSE3-LABEL: phsubsw_v16i16_generic:
9991; SSSE3: # %bb.0: # %entry
100- ; SSSE3-NEXT: phsubsw xmm0, xmm1
101- ; SSSE3-NEXT: phsubsw xmm2, xmm3
102- ; SSSE3-NEXT: movdqa xmm1, xmm2
103- ; SSSE3-NEXT: ret
92+ ; SSSE3-NEXT: phsubsw %xmm1, %xmm0
93+ ; SSSE3-NEXT: phsubsw %xmm3, %xmm2
94+ ; SSSE3-NEXT: movdqa %xmm2, %xmm1
95+ ; SSSE3-NEXT: retq
10496;
10597; AVX2-LABEL: phsubsw_v16i16_generic:
10698; AVX2: # %bb.0: # %entry
107- ; AVX2-NEXT: vphsubsw ymm0, ymm0, ymm1
99+ ; AVX2-NEXT: vphsubsw %ymm1, % ymm0, %ymm0
108100; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
109- ; AVX2-NEXT: ret
101+ ; AVX2-NEXT: retq
110102entry:
111- %even = shufflevector <16 x i16 > %a , <16 x i16 > %b ,
112- <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 ,
113- i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
114- %odd = shufflevector <16 x i16 > %a , <16 x i16 > %b ,
115- <16 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 ,
116- i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 >
103+ %even = shufflevector <16 x i16 > %a , <16 x i16 > %b , <16 x i32 > <i32 0 , i32 2 , i32 4 , i32 6 , i32 8 , i32 10 , i32 12 , i32 14 , i32 16 , i32 18 , i32 20 , i32 22 , i32 24 , i32 26 , i32 28 , i32 30 >
104+ %odd = shufflevector <16 x i16 > %a , <16 x i16 > %b , <16 x i32 > <i32 1 , i32 3 , i32 5 , i32 7 , i32 9 , i32 11 , i32 13 , i32 15 , i32 17 , i32 19 , i32 21 , i32 23 , i32 25 , i32 27 , i32 29 , i32 31 >
117105 %diff = call <16 x i16 > @llvm.ssub.sat.v16i16 (<16 x i16 > %even , <16 x i16 > %odd )
118106 ret <16 x i16 > %diff
119107}
0 commit comments