diff --git a/llvm/test/CodeGen/X86/avgceils.ll b/llvm/test/CodeGen/X86/avgceils.ll index 4529ea275df9c..f44f98c2a41ab 100644 --- a/llvm/test/CodeGen/X86/avgceils.ll +++ b/llvm/test/CodeGen/X86/avgceils.ll @@ -9,7 +9,7 @@ ; 128-bit vectors ; -define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -65,7 +65,7 @@ define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res } -define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] @@ -165,7 +165,7 @@ define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res } -define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -190,7 +190,7 @@ define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } -define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] @@ -289,7 +289,7 @@ define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } -define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -314,7 +314,7 @@ define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %res } -define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] @@ -410,7 +410,7 @@ define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %res } -define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE2-LABEL: test_fixed_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm2 @@ -472,7 +472,7 @@ define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ret <2 x i64> %res } -define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movq %xmm0, %rax @@ -574,7 +574,7 @@ define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ; 256-bit vectors ; -define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v32i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -649,7 +649,7 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) { ret <32 x i8> %res } -define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v32i8: ; SSE2: # %bb.0: ; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] @@ -806,7 +806,7 @@ define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) { ret <32 x i8> %res } -define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -858,7 +858,7 @@ define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) { ret <16 x i16> %res } -define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i16: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] @@ -1014,7 +1014,7 @@ define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) { ret <16 x i16> %res } -define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -1066,7 +1066,7 @@ define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } -define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[2,3,2,3] @@ -1218,7 +1218,7 @@ define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } -define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; SSE2-LABEL: test_fixed_v4i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm1, %xmm4 @@ -1306,27 +1306,15 @@ define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ret <4 x i64> %res } -define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v4i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: pushq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: pushq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: pushq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: pushq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 56 -; SSE2-NEXT: .cfi_offset %rbx, -56 -; SSE2-NEXT: .cfi_offset %r12, -48 -; SSE2-NEXT: .cfi_offset %r13, -40 -; SSE2-NEXT: .cfi_offset %r14, -32 -; SSE2-NEXT: .cfi_offset %r15, -24 -; SSE2-NEXT: .cfi_offset %rbp, -16 ; SSE2-NEXT: movq %xmm0, %r11 ; SSE2-NEXT: movq %r11, %r12 ; SSE2-NEXT: sarq $63, %r12 @@ -1382,39 +1370,21 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE2-NEXT: popq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: popq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: popq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: popq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: popq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: popq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 8 ; SSE2-NEXT: retq ; ; SSE4-LABEL: test_ext_v4i64: ; SSE4: # %bb.0: ; SSE4-NEXT: pushq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: pushq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: pushq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: pushq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: pushq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: pushq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 56 -; SSE4-NEXT: .cfi_offset %rbx, -56 -; SSE4-NEXT: .cfi_offset %r12, -48 -; SSE4-NEXT: .cfi_offset %r13, -40 -; SSE4-NEXT: .cfi_offset %r14, -32 -; SSE4-NEXT: .cfi_offset %r15, -24 -; SSE4-NEXT: .cfi_offset %rbp, -16 ; SSE4-NEXT: pextrq $1, %xmm0, %r11 ; SSE4-NEXT: movq %r11, %r12 ; SSE4-NEXT: sarq $63, %r12 @@ -1466,39 +1436,21 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE4-NEXT: popq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: popq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: popq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: popq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: popq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: popq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 8 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test_ext_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: pushq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: pushq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: pushq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: pushq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 56 -; AVX1-NEXT: .cfi_offset %rbx, -56 -; AVX1-NEXT: .cfi_offset %r12, -48 -; AVX1-NEXT: .cfi_offset %r13, -40 -; AVX1-NEXT: .cfi_offset %r14, -32 -; AVX1-NEXT: .cfi_offset %r15, -24 -; AVX1-NEXT: .cfi_offset %rbp, -16 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpextrq $1, %xmm2, %r11 ; AVX1-NEXT: movq %r11, %r12 @@ -1553,39 +1505,21 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: popq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: popq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: popq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: popq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: popq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: popq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_ext_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: pushq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: pushq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 56 -; AVX2-NEXT: .cfi_offset %rbx, -56 -; AVX2-NEXT: .cfi_offset %r12, -48 -; AVX2-NEXT: .cfi_offset %r13, -40 -; AVX2-NEXT: .cfi_offset %r14, -32 -; AVX2-NEXT: .cfi_offset %r15, -24 -; AVX2-NEXT: .cfi_offset %rbp, -16 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX2-NEXT: vpextrq $1, %xmm2, %r11 ; AVX2-NEXT: movq %r11, %r12 @@ -1640,39 +1574,21 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm2[0] ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: popq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: popq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: popq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: popq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: popq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 8 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_ext_v4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: pushq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: pushq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: pushq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: pushq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 56 -; AVX512-NEXT: .cfi_offset %rbx, -56 -; AVX512-NEXT: .cfi_offset %r12, -48 -; AVX512-NEXT: .cfi_offset %r13, -40 -; AVX512-NEXT: .cfi_offset %r14, -32 -; AVX512-NEXT: .cfi_offset %r15, -24 -; AVX512-NEXT: .cfi_offset %rbp, -16 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX512-NEXT: vpextrq $1, %xmm2, %r11 ; AVX512-NEXT: movq %r11, %r12 @@ -1727,17 +1643,11 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm2[0] ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX512-NEXT: popq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: popq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: popq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: popq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: popq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: popq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq %x0 = sext <4 x i64> %a0 to <4 x i128> %x1 = sext <4 x i64> %a1 to <4 x i128> @@ -1752,7 +1662,7 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; 512-bit vectors ; -define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) { +define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v64i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm11 @@ -1864,7 +1774,7 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %res } -define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) { +define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v64i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm3, %xmm8 @@ -2144,7 +2054,7 @@ define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %res } -define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) { +define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v32i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -2220,7 +2130,7 @@ define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) { ret <32 x i16> %res } -define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) { +define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v32i16: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklwd {{.*#+}} xmm13 = xmm13[0],xmm3[0],xmm13[1],xmm3[1],xmm13[2],xmm3[2],xmm13[3],xmm3[3] @@ -2498,7 +2408,7 @@ define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) { ret <32 x i16> %res } -define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) { +define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -2574,7 +2484,7 @@ define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) { ret <16 x i32> %res } -define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) { +define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm0[2,3,2,3] @@ -2848,7 +2758,7 @@ define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) { ret <16 x i32> %res } -define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) { +define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; SSE2-LABEL: test_fixed_v8i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm3, %xmm8 @@ -2985,29 +2895,16 @@ define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ret <8 x i64> %res } -define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { +define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: pushq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: pushq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: pushq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: pushq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 56 ; SSE2-NEXT: pushq %rax -; SSE2-NEXT: .cfi_def_cfa_offset 64 -; SSE2-NEXT: .cfi_offset %rbx, -56 -; SSE2-NEXT: .cfi_offset %r12, -48 -; SSE2-NEXT: .cfi_offset %r13, -40 -; SSE2-NEXT: .cfi_offset %r14, -32 -; SSE2-NEXT: .cfi_offset %r15, -24 -; SSE2-NEXT: .cfi_offset %rbp, -16 ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSE2-NEXT: sarq $63, %rax @@ -3137,43 +3034,23 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; SSE2-NEXT: addq $8, %rsp -; SSE2-NEXT: .cfi_def_cfa_offset 56 ; SSE2-NEXT: popq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: popq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: popq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: popq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: popq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: popq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 8 ; SSE2-NEXT: retq ; ; SSE4-LABEL: test_ext_v8i64: ; SSE4: # %bb.0: ; SSE4-NEXT: pushq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: pushq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: pushq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: pushq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: pushq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: pushq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 56 ; SSE4-NEXT: subq $16, %rsp -; SSE4-NEXT: .cfi_def_cfa_offset 72 -; SSE4-NEXT: .cfi_offset %rbx, -56 -; SSE4-NEXT: .cfi_offset %r12, -48 -; SSE4-NEXT: .cfi_offset %r13, -40 -; SSE4-NEXT: .cfi_offset %r14, -32 -; SSE4-NEXT: .cfi_offset %r15, -24 -; SSE4-NEXT: .cfi_offset %rbp, -16 ; SSE4-NEXT: pextrq $1, %xmm0, %rax ; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSE4-NEXT: sarq $63, %rax @@ -3301,43 +3178,23 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0] ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; SSE4-NEXT: addq $16, %rsp -; SSE4-NEXT: .cfi_def_cfa_offset 56 ; SSE4-NEXT: popq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: popq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: popq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: popq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: popq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: popq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 8 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test_ext_v8i64: ; AVX1: # %bb.0: ; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: pushq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: pushq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: pushq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: pushq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 56 ; AVX1-NEXT: pushq %rax -; AVX1-NEXT: .cfi_def_cfa_offset 64 -; AVX1-NEXT: .cfi_offset %rbx, -56 -; AVX1-NEXT: .cfi_offset %r12, -48 -; AVX1-NEXT: .cfi_offset %r13, -40 -; AVX1-NEXT: .cfi_offset %r14, -32 -; AVX1-NEXT: .cfi_offset %r15, -24 -; AVX1-NEXT: .cfi_offset %rbp, -16 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vpextrq $1, %xmm4, %rax ; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -3465,43 +3322,23 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm7[0],xmm6[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: addq $8, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 56 ; AVX1-NEXT: popq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: popq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: popq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: popq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: popq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: popq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_ext_v8i64: ; AVX2: # %bb.0: ; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: pushq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: pushq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 56 ; AVX2-NEXT: pushq %rax -; AVX2-NEXT: .cfi_def_cfa_offset 64 -; AVX2-NEXT: .cfi_offset %rbx, -56 -; AVX2-NEXT: .cfi_offset %r12, -48 -; AVX2-NEXT: .cfi_offset %r13, -40 -; AVX2-NEXT: .cfi_offset %r14, -32 -; AVX2-NEXT: .cfi_offset %r15, -24 -; AVX2-NEXT: .cfi_offset %rbp, -16 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 ; AVX2-NEXT: vpextrq $1, %xmm4, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -3629,43 +3466,23 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm7[0],xmm6[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX2-NEXT: addq $8, %rsp -; AVX2-NEXT: .cfi_def_cfa_offset 56 ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: popq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: popq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: popq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: popq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: popq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 8 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_ext_v8i64: ; AVX512: # %bb.0: ; AVX512-NEXT: pushq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: pushq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: pushq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: pushq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 56 ; AVX512-NEXT: pushq %rax -; AVX512-NEXT: .cfi_def_cfa_offset 64 -; AVX512-NEXT: .cfi_offset %rbx, -56 -; AVX512-NEXT: .cfi_offset %r12, -48 -; AVX512-NEXT: .cfi_offset %r13, -40 -; AVX512-NEXT: .cfi_offset %r14, -32 -; AVX512-NEXT: .cfi_offset %r15, -24 -; AVX512-NEXT: .cfi_offset %rbp, -16 ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX512-NEXT: vpextrq $1, %xmm3, %rax @@ -3796,19 +3613,12 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512-NEXT: addq $8, %rsp -; AVX512-NEXT: .cfi_def_cfa_offset 56 ; AVX512-NEXT: popq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: popq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: popq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: popq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: popq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: popq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq %x0 = sext <8 x i64> %a0 to <8 x i128> %x1 = sext <8 x i64> %a1 to <8 x i128> diff --git a/llvm/test/CodeGen/X86/avgceilu.ll b/llvm/test/CodeGen/X86/avgceilu.ll index dee1a5a720f90..d34894cc0fbb3 100644 --- a/llvm/test/CodeGen/X86/avgceilu.ll +++ b/llvm/test/CodeGen/X86/avgceilu.ll @@ -9,7 +9,7 @@ ; 128-bit vectors ; -define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: pavgb %xmm1, %xmm0 @@ -26,7 +26,7 @@ define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res } -define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; SSE-LABEL: test_ext_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: pavgb %xmm1, %xmm0 @@ -45,7 +45,7 @@ define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res } -define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: pavgw %xmm1, %xmm0 @@ -62,7 +62,7 @@ define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } -define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ; SSE-LABEL: test_ext_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: pavgw %xmm1, %xmm0 @@ -81,7 +81,7 @@ define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } -define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -106,7 +106,7 @@ define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %res } -define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm3, %xmm3 @@ -195,7 +195,7 @@ define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %res } -define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_fixed_v2i64: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -220,7 +220,7 @@ define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ret <2 x i64> %res } -define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] @@ -310,7 +310,7 @@ define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ; 256-bit vectors ; -define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v32i8: ; SSE: # %bb.0: ; SSE-NEXT: pavgb %xmm2, %xmm0 @@ -342,7 +342,7 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) { ret <32 x i8> %res } -define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE-LABEL: test_ext_v32i8: ; SSE: # %bb.0: ; SSE-NEXT: pavgb %xmm2, %xmm0 @@ -376,7 +376,7 @@ define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) { ret <32 x i8> %res } -define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i16: ; SSE: # %bb.0: ; SSE-NEXT: pavgw %xmm2, %xmm0 @@ -408,7 +408,7 @@ define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) { ret <16 x i16> %res } -define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE-LABEL: test_ext_v16i16: ; SSE: # %bb.0: ; SSE-NEXT: pavgw %xmm2, %xmm0 @@ -442,7 +442,7 @@ define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) { ret <16 x i16> %res } -define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -494,7 +494,7 @@ define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } -define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm4 @@ -629,7 +629,7 @@ define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } -define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; SSE-LABEL: test_fixed_v4i64: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -681,7 +681,7 @@ define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ret <4 x i64> %res } -define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v4i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3] @@ -937,7 +937,7 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; 512-bit vectors ; -define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) { +define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v64i8: ; SSE: # %bb.0: ; SSE-NEXT: pavgb %xmm4, %xmm0 @@ -977,7 +977,7 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %res } -define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) { +define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE-LABEL: test_ext_v64i8: ; SSE: # %bb.0: ; SSE-NEXT: pavgb %xmm4, %xmm0 @@ -1019,7 +1019,7 @@ define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %res } -define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) { +define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v32i16: ; SSE: # %bb.0: ; SSE-NEXT: pavgw %xmm4, %xmm0 @@ -1059,7 +1059,7 @@ define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) { ret <32 x i16> %res } -define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) { +define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE-LABEL: test_ext_v32i16: ; SSE: # %bb.0: ; SSE-NEXT: pavgw %xmm4, %xmm0 @@ -1101,7 +1101,7 @@ define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) { ret <32 x i16> %res } -define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) { +define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -1177,7 +1177,7 @@ define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) { ret <16 x i32> %res } -define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) { +define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i32: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm2, %xmm8 @@ -1413,7 +1413,7 @@ define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) { ret <16 x i32> %res } -define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) { +define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i64: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -1489,27 +1489,15 @@ define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ret <8 x i64> %res } -define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { +define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: pushq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: pushq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: pushq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: pushq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 56 -; SSE2-NEXT: .cfi_offset %rbx, -56 -; SSE2-NEXT: .cfi_offset %r12, -48 -; SSE2-NEXT: .cfi_offset %r13, -40 -; SSE2-NEXT: .cfi_offset %r14, -32 -; SSE2-NEXT: .cfi_offset %r15, -24 -; SSE2-NEXT: .cfi_offset %rbp, -16 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3] ; SSE2-NEXT: movq %xmm8, %rcx ; SSE2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -1617,39 +1605,21 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; SSE2-NEXT: popq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: popq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: popq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: popq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: popq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: popq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 8 ; SSE2-NEXT: retq ; ; SSE4-LABEL: test_ext_v8i64: ; SSE4: # %bb.0: ; SSE4-NEXT: pushq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: pushq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: pushq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: pushq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: pushq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: pushq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 56 -; SSE4-NEXT: .cfi_offset %rbx, -56 -; SSE4-NEXT: .cfi_offset %r12, -48 -; SSE4-NEXT: .cfi_offset %r13, -40 -; SSE4-NEXT: .cfi_offset %r14, -32 -; SSE4-NEXT: .cfi_offset %r15, -24 -; SSE4-NEXT: .cfi_offset %rbp, -16 ; SSE4-NEXT: movq %xmm3, %rcx ; SSE4-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSE4-NEXT: movq %xmm7, %rdx @@ -1747,39 +1717,21 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0] ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; SSE4-NEXT: popq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: popq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: popq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: popq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: popq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: popq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 8 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test_ext_v8i64: ; AVX1: # %bb.0: ; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: pushq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: pushq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: pushq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: pushq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 56 -; AVX1-NEXT: .cfi_offset %rbx, -56 -; AVX1-NEXT: .cfi_offset %r12, -48 -; AVX1-NEXT: .cfi_offset %r13, -40 -; AVX1-NEXT: .cfi_offset %r14, -32 -; AVX1-NEXT: .cfi_offset %r15, -24 -; AVX1-NEXT: .cfi_offset %rbp, -16 ; AVX1-NEXT: vmovq %xmm1, %rcx ; AVX1-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX1-NEXT: vmovq %xmm3, %rdx @@ -1885,39 +1837,21 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm7[0],xmm6[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: popq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: popq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: popq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: popq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: popq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: popq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_ext_v8i64: ; AVX2: # %bb.0: ; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: pushq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: pushq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 56 -; AVX2-NEXT: .cfi_offset %rbx, -56 -; AVX2-NEXT: .cfi_offset %r12, -48 -; AVX2-NEXT: .cfi_offset %r13, -40 -; AVX2-NEXT: .cfi_offset %r14, -32 -; AVX2-NEXT: .cfi_offset %r15, -24 -; AVX2-NEXT: .cfi_offset %rbp, -16 ; AVX2-NEXT: vmovq %xmm1, %rcx ; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: vmovq %xmm3, %rdx @@ -2023,39 +1957,21 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm7[0],xmm6[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: popq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: popq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: popq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: popq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: popq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 8 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_ext_v8i64: ; AVX512: # %bb.0: ; AVX512-NEXT: pushq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: pushq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: pushq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: pushq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 56 -; AVX512-NEXT: .cfi_offset %rbx, -56 -; AVX512-NEXT: .cfi_offset %r12, -48 -; AVX512-NEXT: .cfi_offset %r13, -40 -; AVX512-NEXT: .cfi_offset %r14, -32 -; AVX512-NEXT: .cfi_offset %r15, -24 -; AVX512-NEXT: .cfi_offset %rbp, -16 ; AVX512-NEXT: vmovq %xmm0, %rcx ; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: vmovq %xmm1, %rdx @@ -2164,17 +2080,11 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512-NEXT: popq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: popq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: popq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: popq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: popq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: popq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq %x0 = zext <8 x i64> %a0 to <8 x i128> %x1 = zext <8 x i64> %a1 to <8 x i128> diff --git a/llvm/test/CodeGen/X86/avgfloors.ll b/llvm/test/CodeGen/X86/avgfloors.ll index a3864ab4bb44e..efee831a15c77 100644 --- a/llvm/test/CodeGen/X86/avgfloors.ll +++ b/llvm/test/CodeGen/X86/avgfloors.ll @@ -9,7 +9,7 @@ ; 128-bit vectors ; -define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -64,7 +64,7 @@ define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res } -define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] @@ -150,7 +150,7 @@ define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res } -define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -174,7 +174,7 @@ define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } -define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] @@ -259,7 +259,7 @@ define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } -define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -283,7 +283,7 @@ define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %res } -define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -365,7 +365,7 @@ define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %res } -define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE2-LABEL: test_fixed_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm2 @@ -425,7 +425,7 @@ define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ret <2 x i64> %res } -define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] @@ -514,7 +514,7 @@ define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ; 256-bit vectors ; -define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v32i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -587,7 +587,7 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) { ret <32 x i8> %res } -define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v32i8: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] @@ -723,7 +723,7 @@ define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) { ret <32 x i8> %res } -define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -773,7 +773,7 @@ define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) { ret <16 x i16> %res } -define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i16: ; SSE2: # %bb.0: ; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] @@ -908,7 +908,7 @@ define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) { ret <16 x i16> %res } -define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -958,7 +958,7 @@ define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } -define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 @@ -1089,7 +1089,7 @@ define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } -define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; SSE2-LABEL: test_fixed_v4i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm1, %xmm4 @@ -1173,27 +1173,15 @@ define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ret <4 x i64> %res } -define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v4i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: pushq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: pushq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: pushq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: pushq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 56 -; SSE2-NEXT: .cfi_offset %rbx, -56 -; SSE2-NEXT: .cfi_offset %r12, -48 -; SSE2-NEXT: .cfi_offset %r13, -40 -; SSE2-NEXT: .cfi_offset %r14, -32 -; SSE2-NEXT: .cfi_offset %r15, -24 -; SSE2-NEXT: .cfi_offset %rbp, -16 ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3] ; SSE2-NEXT: movq %xmm4, %rdx ; SSE2-NEXT: movq %rdx, %r14 @@ -1241,39 +1229,21 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE2-NEXT: popq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: popq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: popq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: popq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: popq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: popq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 8 ; SSE2-NEXT: retq ; ; SSE4-LABEL: test_ext_v4i64: ; SSE4: # %bb.0: ; SSE4-NEXT: pushq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: pushq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: pushq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: pushq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: pushq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: pushq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 56 -; SSE4-NEXT: .cfi_offset %rbx, -56 -; SSE4-NEXT: .cfi_offset %r12, -48 -; SSE4-NEXT: .cfi_offset %r13, -40 -; SSE4-NEXT: .cfi_offset %r14, -32 -; SSE4-NEXT: .cfi_offset %r15, -24 -; SSE4-NEXT: .cfi_offset %rbp, -16 ; SSE4-NEXT: movq %xmm1, %rdi ; SSE4-NEXT: movq %rdi, %r14 ; SSE4-NEXT: sarq $63, %r14 @@ -1317,39 +1287,21 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE4-NEXT: popq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: popq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: popq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: popq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: popq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: popq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 8 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test_ext_v4i64: ; AVX1: # %bb.0: ; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: pushq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: pushq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: pushq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: pushq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 56 -; AVX1-NEXT: .cfi_offset %rbx, -56 -; AVX1-NEXT: .cfi_offset %r12, -48 -; AVX1-NEXT: .cfi_offset %r13, -40 -; AVX1-NEXT: .cfi_offset %r14, -32 -; AVX1-NEXT: .cfi_offset %r15, -24 -; AVX1-NEXT: .cfi_offset %rbp, -16 ; AVX1-NEXT: vmovq %xmm0, %rdx ; AVX1-NEXT: movq %rdx, %r14 ; AVX1-NEXT: sarq $63, %r14 @@ -1396,39 +1348,21 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: popq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: popq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: popq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: popq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: popq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: popq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_ext_v4i64: ; AVX2: # %bb.0: ; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: pushq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: pushq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 56 -; AVX2-NEXT: .cfi_offset %rbx, -56 -; AVX2-NEXT: .cfi_offset %r12, -48 -; AVX2-NEXT: .cfi_offset %r13, -40 -; AVX2-NEXT: .cfi_offset %r14, -32 -; AVX2-NEXT: .cfi_offset %r15, -24 -; AVX2-NEXT: .cfi_offset %rbp, -16 ; AVX2-NEXT: vmovq %xmm0, %rdx ; AVX2-NEXT: movq %rdx, %r14 ; AVX2-NEXT: sarq $63, %r14 @@ -1475,39 +1409,21 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm2[0] ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: popq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: popq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: popq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: popq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: popq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 8 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_ext_v4i64: ; AVX512: # %bb.0: ; AVX512-NEXT: pushq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: pushq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: pushq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: pushq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 56 -; AVX512-NEXT: .cfi_offset %rbx, -56 -; AVX512-NEXT: .cfi_offset %r12, -48 -; AVX512-NEXT: .cfi_offset %r13, -40 -; AVX512-NEXT: .cfi_offset %r14, -32 -; AVX512-NEXT: .cfi_offset %r15, -24 -; AVX512-NEXT: .cfi_offset %rbp, -16 ; AVX512-NEXT: vmovq %xmm0, %rdx ; AVX512-NEXT: movq %rdx, %r14 ; AVX512-NEXT: sarq $63, %r14 @@ -1554,17 +1470,11 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm2[0] ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ; AVX512-NEXT: popq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: popq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: popq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: popq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: popq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: popq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq %x0 = sext <4 x i64> %a0 to <4 x i128> %x1 = sext <4 x i64> %a1 to <4 x i128> @@ -1578,7 +1488,7 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; 512-bit vectors ; -define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) { +define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v64i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm10 @@ -1690,7 +1600,7 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %res } -define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) { +define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v64i8: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklbw {{.*#+}} xmm13 = xmm13[0],xmm3[0],xmm13[1],xmm3[1],xmm13[2],xmm3[2],xmm13[3],xmm3[3],xmm13[4],xmm3[4],xmm13[5],xmm3[5],xmm13[6],xmm3[6],xmm13[7],xmm3[7] @@ -1934,7 +1844,7 @@ define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %res } -define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) { +define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v32i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -2007,7 +1917,7 @@ define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) { ret <32 x i16> %res } -define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) { +define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v32i16: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm3, %xmm9 @@ -2251,7 +2161,7 @@ define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) { ret <32 x i16> %res } -define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) { +define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -2324,7 +2234,7 @@ define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) { ret <16 x i32> %res } -define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) { +define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm8, %xmm8 @@ -2561,7 +2471,7 @@ define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) { ret <16 x i32> %res } -define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) { +define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; SSE2-LABEL: test_fixed_v8i64: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm3, %xmm11 @@ -2698,29 +2608,16 @@ define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ret <8 x i64> %res } -define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { +define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: pushq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: pushq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: pushq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: pushq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 56 ; SSE2-NEXT: pushq %rax -; SSE2-NEXT: .cfi_def_cfa_offset 64 -; SSE2-NEXT: .cfi_offset %rbx, -56 -; SSE2-NEXT: .cfi_offset %r12, -48 -; SSE2-NEXT: .cfi_offset %r13, -40 -; SSE2-NEXT: .cfi_offset %r14, -32 -; SSE2-NEXT: .cfi_offset %r15, -24 -; SSE2-NEXT: .cfi_offset %rbp, -16 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3] ; SSE2-NEXT: movq %xmm8, %rax ; SSE2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -2832,43 +2729,23 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; SSE2-NEXT: addq $8, %rsp -; SSE2-NEXT: .cfi_def_cfa_offset 56 ; SSE2-NEXT: popq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: popq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: popq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: popq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: popq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: popq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 8 ; SSE2-NEXT: retq ; ; SSE4-LABEL: test_ext_v8i64: ; SSE4: # %bb.0: ; SSE4-NEXT: pushq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: pushq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: pushq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: pushq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: pushq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: pushq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 56 ; SSE4-NEXT: pushq %rax -; SSE4-NEXT: .cfi_def_cfa_offset 64 -; SSE4-NEXT: .cfi_offset %rbx, -56 -; SSE4-NEXT: .cfi_offset %r12, -48 -; SSE4-NEXT: .cfi_offset %r13, -40 -; SSE4-NEXT: .cfi_offset %r14, -32 -; SSE4-NEXT: .cfi_offset %r15, -24 -; SSE4-NEXT: .cfi_offset %rbp, -16 ; SSE4-NEXT: movq %xmm3, %rax ; SSE4-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; SSE4-NEXT: movq %rax, %rcx @@ -2972,43 +2849,23 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0] ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; SSE4-NEXT: addq $8, %rsp -; SSE4-NEXT: .cfi_def_cfa_offset 56 ; SSE4-NEXT: popq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: popq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: popq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: popq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: popq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: popq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 8 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test_ext_v8i64: ; AVX1: # %bb.0: ; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: pushq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: pushq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: pushq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: pushq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 56 ; AVX1-NEXT: pushq %rax -; AVX1-NEXT: .cfi_def_cfa_offset 64 -; AVX1-NEXT: .cfi_offset %rbx, -56 -; AVX1-NEXT: .cfi_offset %r12, -48 -; AVX1-NEXT: .cfi_offset %r13, -40 -; AVX1-NEXT: .cfi_offset %r14, -32 -; AVX1-NEXT: .cfi_offset %r15, -24 -; AVX1-NEXT: .cfi_offset %rbp, -16 ; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX1-NEXT: movq %rax, %rcx @@ -3118,43 +2975,23 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm7[0],xmm6[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: addq $8, %rsp -; AVX1-NEXT: .cfi_def_cfa_offset 56 ; AVX1-NEXT: popq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: popq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: popq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: popq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: popq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: popq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_ext_v8i64: ; AVX2: # %bb.0: ; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: pushq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: pushq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 56 ; AVX2-NEXT: pushq %rax -; AVX2-NEXT: .cfi_def_cfa_offset 64 -; AVX2-NEXT: .cfi_offset %rbx, -56 -; AVX2-NEXT: .cfi_offset %r12, -48 -; AVX2-NEXT: .cfi_offset %r13, -40 -; AVX2-NEXT: .cfi_offset %r14, -32 -; AVX2-NEXT: .cfi_offset %r15, -24 -; AVX2-NEXT: .cfi_offset %rbp, -16 ; AVX2-NEXT: vmovq %xmm1, %rax ; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX2-NEXT: movq %rax, %rcx @@ -3264,43 +3101,23 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm7[0],xmm6[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX2-NEXT: addq $8, %rsp -; AVX2-NEXT: .cfi_def_cfa_offset 56 ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: popq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: popq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: popq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: popq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: popq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 8 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_ext_v8i64: ; AVX512: # %bb.0: ; AVX512-NEXT: pushq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: pushq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: pushq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: pushq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 56 ; AVX512-NEXT: pushq %rax -; AVX512-NEXT: .cfi_def_cfa_offset 64 -; AVX512-NEXT: .cfi_offset %rbx, -56 -; AVX512-NEXT: .cfi_offset %r12, -48 -; AVX512-NEXT: .cfi_offset %r13, -40 -; AVX512-NEXT: .cfi_offset %r14, -32 -; AVX512-NEXT: .cfi_offset %r15, -24 -; AVX512-NEXT: .cfi_offset %rbp, -16 ; AVX512-NEXT: vmovq %xmm0, %rax ; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; AVX512-NEXT: movq %rax, %rcx @@ -3413,19 +3230,12 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512-NEXT: addq $8, %rsp -; AVX512-NEXT: .cfi_def_cfa_offset 56 ; AVX512-NEXT: popq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: popq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: popq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: popq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: popq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: popq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq %x0 = sext <8 x i64> %a0 to <8 x i128> %x1 = sext <8 x i64> %a1 to <8 x i128> diff --git a/llvm/test/CodeGen/X86/avgflooru.ll b/llvm/test/CodeGen/X86/avgflooru.ll index e07c1f55991e8..000457c5ab1e6 100644 --- a/llvm/test/CodeGen/X86/avgflooru.ll +++ b/llvm/test/CodeGen/X86/avgflooru.ll @@ -9,7 +9,7 @@ ; 128-bit vectors ; -define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -53,7 +53,7 @@ define <16 x i8> @test_fixed_v16i8(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res } -define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) { +define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -127,7 +127,7 @@ define <16 x i8> @test_ext_v16i8(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res } -define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -151,7 +151,7 @@ define <8 x i16> @test_fixed_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } -define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) { +define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i16: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -227,7 +227,7 @@ define <8 x i16> @test_ext_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } -define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -251,7 +251,7 @@ define <4 x i32> @test_fixed_v4i32(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %res } -define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) { +define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v4i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm2, %xmm2 @@ -325,7 +325,7 @@ define <4 x i32> @test_ext_v4i32(<4 x i32> %a0, <4 x i32> %a1) { ret <4 x i32> %res } -define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_fixed_v2i64: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -349,7 +349,7 @@ define <2 x i64> @test_fixed_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ret <2 x i64> %res } -define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) { +define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v2i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] @@ -458,7 +458,7 @@ define <2 x i64> @test_ext_v2i64(<2 x i64> %a0, <2 x i64> %a1) { ; 256-bit vectors ; -define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v32i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -516,7 +516,7 @@ define <32 x i8> @test_fixed_v32i8(<32 x i8> %a0, <32 x i8> %a1) { ret <32 x i8> %res } -define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) { +define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v32i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 @@ -627,7 +627,7 @@ define <32 x i8> @test_ext_v32i8(<32 x i8> %a0, <32 x i8> %a1) { ret <32 x i8> %res } -define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -677,7 +677,7 @@ define <16 x i16> @test_fixed_v16i16(<16 x i16> %a0, <16 x i16> %a1) { ret <16 x i16> %res } -define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) { +define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i16: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 @@ -792,7 +792,7 @@ define <16 x i16> @test_ext_v16i16(<16 x i16> %a0, <16 x i16> %a1) { ret <16 x i16> %res } -define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -842,7 +842,7 @@ define <8 x i32> @test_fixed_v8i32(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } -define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) { +define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm4, %xmm4 @@ -954,7 +954,7 @@ define <8 x i32> @test_ext_v8i32(<8 x i32> %a0, <8 x i32> %a1) { ret <8 x i32> %res } -define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; SSE-LABEL: test_fixed_v4i64: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm4 @@ -1004,7 +1004,7 @@ define <4 x i64> @test_fixed_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ret <4 x i64> %res } -define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { +define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v4i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[2,3,2,3] @@ -1199,7 +1199,7 @@ define <4 x i64> @test_ext_v4i64(<4 x i64> %a0, <4 x i64> %a1) { ; 512-bit vectors ; -define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) { +define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE-LABEL: test_fixed_v64i8: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm9 @@ -1286,7 +1286,7 @@ define <64 x i8> @test_fixed_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %res } -define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) { +define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) nounwind { ; SSE2-LABEL: test_ext_v64i8: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm8, %xmm8 @@ -1481,7 +1481,7 @@ define <64 x i8> @test_ext_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ret <64 x i8> %res } -define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) { +define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE-LABEL: test_fixed_v32i16: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -1554,7 +1554,7 @@ define <32 x i16> @test_fixed_v32i16(<32 x i16> %a0, <32 x i16> %a1) { ret <32 x i16> %res } -define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) { +define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) nounwind { ; SSE2-LABEL: test_ext_v32i16: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm8, %xmm8 @@ -1757,7 +1757,7 @@ define <32 x i16> @test_ext_v32i16(<32 x i16> %a0, <32 x i16> %a1) { ret <32 x i16> %res } -define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) { +define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; SSE-LABEL: test_fixed_v16i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -1830,7 +1830,7 @@ define <16 x i32> @test_fixed_v16i32(<16 x i32> %a0, <16 x i32> %a1) { ret <16 x i32> %res } -define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) { +define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; SSE2-LABEL: test_ext_v16i32: ; SSE2: # %bb.0: ; SSE2-NEXT: pxor %xmm8, %xmm8 @@ -2027,7 +2027,7 @@ define <16 x i32> @test_ext_v16i32(<16 x i32> %a0, <16 x i32> %a1) { ret <16 x i32> %res } -define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) { +define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; SSE-LABEL: test_fixed_v8i64: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm3, %xmm8 @@ -2100,27 +2100,15 @@ define <8 x i64> @test_fixed_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ret <8 x i64> %res } -define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { +define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; SSE2-LABEL: test_ext_v8i64: ; SSE2: # %bb.0: ; SSE2-NEXT: pushq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: pushq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: pushq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: pushq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: pushq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: pushq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 56 -; SSE2-NEXT: .cfi_offset %rbx, -56 -; SSE2-NEXT: .cfi_offset %r12, -48 -; SSE2-NEXT: .cfi_offset %r13, -40 -; SSE2-NEXT: .cfi_offset %r14, -32 -; SSE2-NEXT: .cfi_offset %r15, -24 -; SSE2-NEXT: .cfi_offset %rbp, -16 ; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm3[2,3,2,3] ; SSE2-NEXT: movq %xmm3, %rbx ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] @@ -2194,39 +2182,21 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0] ; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; SSE2-NEXT: popq %rbx -; SSE2-NEXT: .cfi_def_cfa_offset 48 ; SSE2-NEXT: popq %r12 -; SSE2-NEXT: .cfi_def_cfa_offset 40 ; SSE2-NEXT: popq %r13 -; SSE2-NEXT: .cfi_def_cfa_offset 32 ; SSE2-NEXT: popq %r14 -; SSE2-NEXT: .cfi_def_cfa_offset 24 ; SSE2-NEXT: popq %r15 -; SSE2-NEXT: .cfi_def_cfa_offset 16 ; SSE2-NEXT: popq %rbp -; SSE2-NEXT: .cfi_def_cfa_offset 8 ; SSE2-NEXT: retq ; ; SSE4-LABEL: test_ext_v8i64: ; SSE4: # %bb.0: ; SSE4-NEXT: pushq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: pushq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: pushq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: pushq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: pushq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: pushq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 56 -; SSE4-NEXT: .cfi_offset %rbx, -56 -; SSE4-NEXT: .cfi_offset %r12, -48 -; SSE4-NEXT: .cfi_offset %r13, -40 -; SSE4-NEXT: .cfi_offset %r14, -32 -; SSE4-NEXT: .cfi_offset %r15, -24 -; SSE4-NEXT: .cfi_offset %rbp, -16 ; SSE4-NEXT: pextrq $1, %xmm3, %r14 ; SSE4-NEXT: movq %xmm2, %r13 ; SSE4-NEXT: pextrq $1, %xmm2, %rbp @@ -2292,39 +2262,21 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm6[0] ; SSE4-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm7[0] ; SSE4-NEXT: popq %rbx -; SSE4-NEXT: .cfi_def_cfa_offset 48 ; SSE4-NEXT: popq %r12 -; SSE4-NEXT: .cfi_def_cfa_offset 40 ; SSE4-NEXT: popq %r13 -; SSE4-NEXT: .cfi_def_cfa_offset 32 ; SSE4-NEXT: popq %r14 -; SSE4-NEXT: .cfi_def_cfa_offset 24 ; SSE4-NEXT: popq %r15 -; SSE4-NEXT: .cfi_def_cfa_offset 16 ; SSE4-NEXT: popq %rbp -; SSE4-NEXT: .cfi_def_cfa_offset 8 ; SSE4-NEXT: retq ; ; AVX1-LABEL: test_ext_v8i64: ; AVX1: # %bb.0: ; AVX1-NEXT: pushq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: pushq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: pushq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: pushq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: pushq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: pushq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 56 -; AVX1-NEXT: .cfi_offset %rbx, -56 -; AVX1-NEXT: .cfi_offset %r12, -48 -; AVX1-NEXT: .cfi_offset %r13, -40 -; AVX1-NEXT: .cfi_offset %r14, -32 -; AVX1-NEXT: .cfi_offset %r15, -24 -; AVX1-NEXT: .cfi_offset %rbp, -16 ; AVX1-NEXT: vpextrq $1, %xmm1, %rbx ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 ; AVX1-NEXT: vmovq %xmm4, %r15 @@ -2396,39 +2348,21 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm7[0],xmm6[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: popq %rbx -; AVX1-NEXT: .cfi_def_cfa_offset 48 ; AVX1-NEXT: popq %r12 -; AVX1-NEXT: .cfi_def_cfa_offset 40 ; AVX1-NEXT: popq %r13 -; AVX1-NEXT: .cfi_def_cfa_offset 32 ; AVX1-NEXT: popq %r14 -; AVX1-NEXT: .cfi_def_cfa_offset 24 ; AVX1-NEXT: popq %r15 -; AVX1-NEXT: .cfi_def_cfa_offset 16 ; AVX1-NEXT: popq %rbp -; AVX1-NEXT: .cfi_def_cfa_offset 8 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_ext_v8i64: ; AVX2: # %bb.0: ; AVX2-NEXT: pushq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: pushq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: pushq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: pushq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: pushq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: pushq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 56 -; AVX2-NEXT: .cfi_offset %rbx, -56 -; AVX2-NEXT: .cfi_offset %r12, -48 -; AVX2-NEXT: .cfi_offset %r13, -40 -; AVX2-NEXT: .cfi_offset %r14, -32 -; AVX2-NEXT: .cfi_offset %r15, -24 -; AVX2-NEXT: .cfi_offset %rbp, -16 ; AVX2-NEXT: vpextrq $1, %xmm1, %rbx ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm4 ; AVX2-NEXT: vmovq %xmm4, %r15 @@ -2500,39 +2434,21 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm7[0],xmm6[0] ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX2-NEXT: popq %rbx -; AVX2-NEXT: .cfi_def_cfa_offset 48 ; AVX2-NEXT: popq %r12 -; AVX2-NEXT: .cfi_def_cfa_offset 40 ; AVX2-NEXT: popq %r13 -; AVX2-NEXT: .cfi_def_cfa_offset 32 ; AVX2-NEXT: popq %r14 -; AVX2-NEXT: .cfi_def_cfa_offset 24 ; AVX2-NEXT: popq %r15 -; AVX2-NEXT: .cfi_def_cfa_offset 16 ; AVX2-NEXT: popq %rbp -; AVX2-NEXT: .cfi_def_cfa_offset 8 ; AVX2-NEXT: retq ; ; AVX512-LABEL: test_ext_v8i64: ; AVX512: # %bb.0: ; AVX512-NEXT: pushq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: pushq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: pushq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: pushq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 56 -; AVX512-NEXT: .cfi_offset %rbx, -56 -; AVX512-NEXT: .cfi_offset %r12, -48 -; AVX512-NEXT: .cfi_offset %r13, -40 -; AVX512-NEXT: .cfi_offset %r14, -32 -; AVX512-NEXT: .cfi_offset %r15, -24 -; AVX512-NEXT: .cfi_offset %rbp, -16 ; AVX512-NEXT: vpextrq $1, %xmm0, %r10 ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX512-NEXT: vpextrq $1, %xmm2, %r13 @@ -2607,17 +2523,11 @@ define <8 x i64> @test_ext_v8i64(<8 x i64> %a0, <8 x i64> %a1) { ; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 ; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512-NEXT: popq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 48 ; AVX512-NEXT: popq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 40 ; AVX512-NEXT: popq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 32 ; AVX512-NEXT: popq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 24 ; AVX512-NEXT: popq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 16 ; AVX512-NEXT: popq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 8 ; AVX512-NEXT: retq %x0 = zext <8 x i64> %a0 to <8 x i128> %x1 = zext <8 x i64> %a1 to <8 x i128>