diff --git a/llvm/test/CodeGen/X86/cvt16.ll b/llvm/test/CodeGen/X86/cvt16.ll index 0a3fa492bebafb..5af60b3c59964b 100644 --- a/llvm/test/CodeGen/X86/cvt16.ll +++ b/llvm/test/CodeGen/X86/cvt16.ll @@ -21,17 +21,14 @@ ; vcvtps2ph instructions -define void @test1(float %src, ptr %dest) { +define void @test1(float %src, ptr %dest) nounwind { ; LIBCALL-LABEL: test1: ; LIBCALL: # %bb.0: ; LIBCALL-NEXT: pushq %rbx -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 -; LIBCALL-NEXT: .cfi_offset %rbx, -16 ; LIBCALL-NEXT: movq %rdi, %rbx ; LIBCALL-NEXT: callq __truncsfhf2@PLT ; LIBCALL-NEXT: pextrw $0, %xmm0, (%rbx) ; LIBCALL-NEXT: popq %rbx -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 ; LIBCALL-NEXT: retq ; ; F16C-LABEL: test1: @@ -44,20 +41,17 @@ define void @test1(float %src, ptr %dest) { ; SOFTFLOAT-LABEL: test1: ; SOFTFLOAT: # %bb.0: ; SOFTFLOAT-NEXT: pushq %rbx -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 16 -; SOFTFLOAT-NEXT: .cfi_offset %rbx, -16 ; SOFTFLOAT-NEXT: movq %rsi, %rbx ; SOFTFLOAT-NEXT: callq __gnu_f2h_ieee@PLT ; SOFTFLOAT-NEXT: movw %ax, (%rbx) ; SOFTFLOAT-NEXT: popq %rbx -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 8 ; SOFTFLOAT-NEXT: retq %1 = tail call i16 @llvm.convert.to.fp16.f32(float %src) store i16 %1, ptr %dest, align 2 ret void } -define float @test2(ptr nocapture %src) { +define float @test2(ptr nocapture %src) nounwind { ; LIBCALL-LABEL: test2: ; LIBCALL: # %bb.0: ; LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 @@ -73,11 +67,9 @@ define float @test2(ptr nocapture %src) { ; SOFTFLOAT-LABEL: test2: ; SOFTFLOAT: # %bb.0: ; SOFTFLOAT-NEXT: pushq %rax -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 16 ; SOFTFLOAT-NEXT: movzwl (%rdi), %edi ; SOFTFLOAT-NEXT: callq __gnu_h2f_ieee@PLT ; SOFTFLOAT-NEXT: popq %rcx -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 8 ; SOFTFLOAT-NEXT: retq %1 = load i16, ptr %src, align 2 %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1) @@ -118,16 +110,14 @@ define float @test3(float %src) nounwind uwtable readnone { ret float %2 } -define double @test4(ptr nocapture %src) { +define double @test4(ptr nocapture %src) nounwind { ; LIBCALL-LABEL: test4: ; LIBCALL: # %bb.0: ; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 ; LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 ; LIBCALL-NEXT: callq __extendhfsf2@PLT ; LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 ; LIBCALL-NEXT: popq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 ; LIBCALL-NEXT: retq ; ; F16C-LABEL: test4: @@ -141,29 +131,25 @@ define double @test4(ptr nocapture %src) { ; SOFTFLOAT-LABEL: test4: ; SOFTFLOAT: # %bb.0: ; SOFTFLOAT-NEXT: pushq %rax -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 16 ; SOFTFLOAT-NEXT: movzwl (%rdi), %edi ; SOFTFLOAT-NEXT: callq __gnu_h2f_ieee@PLT ; SOFTFLOAT-NEXT: movl %eax, %edi ; SOFTFLOAT-NEXT: callq __extendsfdf2@PLT ; SOFTFLOAT-NEXT: popq %rcx -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 8 ; SOFTFLOAT-NEXT: retq %1 = load i16, ptr %src, align 2 %2 = tail call double @llvm.convert.from.fp16.f64(i16 %1) ret double %2 } -define i16 @test5(double %src) { +define i16 @test5(double %src) nounwind { ; LIBCALL-LABEL: test5: ; LIBCALL: # %bb.0: ; LIBCALL-NEXT: pushq %rax -; LIBCALL-NEXT: .cfi_def_cfa_offset 16 ; LIBCALL-NEXT: callq __truncdfhf2@PLT ; LIBCALL-NEXT: pextrw $0, %xmm0, %eax ; LIBCALL-NEXT: # kill: def $ax killed $ax killed $eax ; LIBCALL-NEXT: popq %rcx -; LIBCALL-NEXT: .cfi_def_cfa_offset 8 ; LIBCALL-NEXT: retq ; ; F16C-LABEL: test5: @@ -177,10 +163,8 @@ define i16 @test5(double %src) { ; SOFTFLOAT-LABEL: test5: ; SOFTFLOAT: # %bb.0: ; SOFTFLOAT-NEXT: pushq %rax -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 16 ; SOFTFLOAT-NEXT: callq __truncdfhf2@PLT ; SOFTFLOAT-NEXT: popq %rcx -; SOFTFLOAT-NEXT: .cfi_def_cfa_offset 8 ; SOFTFLOAT-NEXT: retq %val = tail call i16 @llvm.convert.to.fp16.f64(double %src) ret i16 %val diff --git a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll index 2b3891a6fee8f6..1069595449bbd9 100644 --- a/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll +++ b/llvm/test/CodeGen/X86/fastmath-float-half-conversion.ll @@ -14,12 +14,10 @@ define zeroext i16 @test1_fast(double %d) #0 { ; AVX-LABEL: test1_fast: ; AVX: # %bb.0: # %entry ; AVX-NEXT: pushq %rax -; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: callq __truncdfhf2@PLT ; AVX-NEXT: vpextrw $0, %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: popq %rcx -; AVX-NEXT: .cfi_def_cfa_offset 8 ; AVX-NEXT: retq entry: %0 = tail call i16 @llvm.convert.to.fp16.f64(double %d) @@ -30,14 +28,12 @@ define zeroext i16 @test2_fast(x86_fp80 %d) #0 { ; ALL-LABEL: test2_fast: ; ALL: # %bb.0: # %entry ; ALL-NEXT: subq $24, %rsp -; ALL-NEXT: .cfi_def_cfa_offset 32 ; ALL-NEXT: fldt {{[0-9]+}}(%rsp) ; ALL-NEXT: fstpt (%rsp) ; ALL-NEXT: callq __truncxfhf2@PLT ; ALL-NEXT: vpextrw $0, %xmm0, %eax ; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: addq $24, %rsp -; ALL-NEXT: .cfi_def_cfa_offset 8 ; ALL-NEXT: retq entry: %0 = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %d) @@ -56,12 +52,10 @@ define zeroext i16 @test1(double %d) #1 { ; AVX-LABEL: test1: ; AVX: # %bb.0: # %entry ; AVX-NEXT: pushq %rax -; AVX-NEXT: .cfi_def_cfa_offset 16 ; AVX-NEXT: callq __truncdfhf2@PLT ; AVX-NEXT: vpextrw $0, %xmm0, %eax ; AVX-NEXT: # kill: def $ax killed $ax killed $eax ; AVX-NEXT: popq %rcx -; AVX-NEXT: .cfi_def_cfa_offset 8 ; AVX-NEXT: retq entry: %0 = tail call i16 @llvm.convert.to.fp16.f64(double %d) @@ -72,14 +66,12 @@ define zeroext i16 @test2(x86_fp80 %d) #1 { ; ALL-LABEL: test2: ; ALL: # %bb.0: # %entry ; ALL-NEXT: subq $24, %rsp -; ALL-NEXT: .cfi_def_cfa_offset 32 ; ALL-NEXT: fldt {{[0-9]+}}(%rsp) ; ALL-NEXT: fstpt (%rsp) ; ALL-NEXT: callq __truncxfhf2@PLT ; ALL-NEXT: vpextrw $0, %xmm0, %eax ; ALL-NEXT: # kill: def $ax killed $ax killed $eax ; ALL-NEXT: addq $24, %rsp -; ALL-NEXT: .cfi_def_cfa_offset 8 ; ALL-NEXT: retq entry: %0 = tail call i16 @llvm.convert.to.fp16.f80(x86_fp80 %d) @@ -89,5 +81,5 @@ entry: declare i16 @llvm.convert.to.fp16.f64(double) declare i16 @llvm.convert.to.fp16.f80(x86_fp80) -attributes #0 = { nounwind readnone uwtable "unsafe-fp-math"="true" "use-soft-float"="false" } -attributes #1 = { nounwind readnone uwtable "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { nounwind readnone "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nounwind readnone "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index 776248c79b52b9..23abe9bac5e4de 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX1 -; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX2 -; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX2 -; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX2 +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX +; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX512 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX512 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+fast-variable-perlane-shuffle -verify-machineinstrs | FileCheck %s --check-prefixes=ALL,AVX512 @@ -55,21 +55,13 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) nounwind { } define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) nounwind { -; AVX1-LABEL: cvt_16i16_to_16f32: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtph2ps %xmm0, %ymm2 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vcvtph2ps %xmm0, %ymm1 -; AVX1-NEXT: vmovaps %ymm2, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: cvt_16i16_to_16f32: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtph2ps %xmm0, %ymm2 -; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vcvtph2ps %xmm0, %ymm1 -; AVX2-NEXT: vmovaps %ymm2, %ymm0 -; AVX2-NEXT: retq +; AVX-LABEL: cvt_16i16_to_16f32: +; AVX: # %bb.0: +; AVX-NEXT: vcvtph2ps %xmm0, %ymm2 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-NEXT: vcvtph2ps %xmm0, %ymm1 +; AVX-NEXT: vmovaps %ymm2, %ymm0 +; AVX-NEXT: retq ; ; AVX512-LABEL: cvt_16i16_to_16f32: ; AVX512: # %bb.0: @@ -115,19 +107,12 @@ define <8 x float> @cvt_8i16_to_8f32_constrained(<8 x i16> %a0) nounwind strictf declare <8 x float> @llvm.experimental.constrained.fpext.v8f32.v8f16(<8 x half>, metadata) strictfp define <16 x float> @cvt_16i16_to_16f32_constrained(<16 x i16> %a0) nounwind strictfp { -; AVX1-LABEL: cvt_16i16_to_16f32_constrained: -; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vcvtph2ps %xmm1, %ymm1 -; AVX1-NEXT: vcvtph2ps %xmm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: cvt_16i16_to_16f32_constrained: -; AVX2: # %bb.0: -; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vcvtph2ps %xmm1, %ymm1 -; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 -; AVX2-NEXT: retq +; AVX-LABEL: cvt_16i16_to_16f32_constrained: +; AVX: # %bb.0: +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vcvtph2ps %xmm1, %ymm1 +; AVX-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX-NEXT: retq ; ; AVX512-LABEL: cvt_16i16_to_16f32_constrained: ; AVX512: # %bb.0: @@ -191,17 +176,11 @@ define <8 x float> @load_cvt_8i16_to_8f32(ptr %a0) nounwind { } define <16 x float> @load_cvt_16i16_to_16f32(ptr %a0) nounwind { -; AVX1-LABEL: load_cvt_16i16_to_16f32: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtph2ps (%rdi), %ymm0 -; AVX1-NEXT: vcvtph2ps 16(%rdi), %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_cvt_16i16_to_16f32: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtph2ps (%rdi), %ymm0 -; AVX2-NEXT: vcvtph2ps 16(%rdi), %ymm1 -; AVX2-NEXT: retq +; AVX-LABEL: load_cvt_16i16_to_16f32: +; AVX: # %bb.0: +; AVX-NEXT: vcvtph2ps (%rdi), %ymm0 +; AVX-NEXT: vcvtph2ps 16(%rdi), %ymm1 +; AVX-NEXT: retq ; ; AVX512-LABEL: load_cvt_16i16_to_16f32: ; AVX512: # %bb.0: @@ -302,21 +281,13 @@ define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind { } define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { -; AVX1-LABEL: cvt_8i16_to_8f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtph2ps %xmm0, %ymm1 -; AVX1-NEXT: vcvtps2pd %xmm1, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vcvtps2pd %xmm1, %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: cvt_8i16_to_8f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtph2ps %xmm0, %ymm1 -; AVX2-NEXT: vcvtps2pd %xmm1, %ymm0 -; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vcvtps2pd %xmm1, %ymm1 -; AVX2-NEXT: retq +; AVX-LABEL: cvt_8i16_to_8f64: +; AVX: # %bb.0: +; AVX-NEXT: vcvtph2ps %xmm0, %ymm1 +; AVX-NEXT: vcvtps2pd %xmm1, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX-NEXT: vcvtps2pd %xmm1, %ymm1 +; AVX-NEXT: retq ; ; AVX512-LABEL: cvt_8i16_to_8f64: ; AVX512: # %bb.0: @@ -354,21 +325,13 @@ define <4 x double> @cvt_4i16_to_4f64_constrained(<4 x i16> %a0) nounwind strict declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f16(<4 x half>, metadata) strictfp define <8 x double> @cvt_8i16_to_8f64_constrained(<8 x i16> %a0) nounwind strictfp { -; AVX1-LABEL: cvt_8i16_to_8f64_constrained: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtph2ps %xmm0, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vcvtps2pd %xmm1, %ymm1 -; AVX1-NEXT: vcvtps2pd %xmm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: cvt_8i16_to_8f64_constrained: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtph2ps %xmm0, %ymm0 -; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vcvtps2pd %xmm1, %ymm1 -; AVX2-NEXT: vcvtps2pd %xmm0, %ymm0 -; AVX2-NEXT: retq +; AVX-LABEL: cvt_8i16_to_8f64_constrained: +; AVX: # %bb.0: +; AVX-NEXT: vcvtph2ps %xmm0, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-NEXT: vcvtps2pd %xmm1, %ymm1 +; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 +; AVX-NEXT: retq ; ; AVX512-LABEL: cvt_8i16_to_8f64_constrained: ; AVX512: # %bb.0: @@ -439,21 +402,13 @@ define <4 x double> @load_cvt_8i16_to_4f64(ptr %a0) nounwind { } define <8 x double> @load_cvt_8i16_to_8f64(ptr %a0) nounwind { -; AVX1-LABEL: load_cvt_8i16_to_8f64: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtph2ps (%rdi), %ymm1 -; AVX1-NEXT: vcvtps2pd %xmm1, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vcvtps2pd %xmm1, %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: load_cvt_8i16_to_8f64: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtph2ps (%rdi), %ymm1 -; AVX2-NEXT: vcvtps2pd %xmm1, %ymm0 -; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vcvtps2pd %xmm1, %ymm1 -; AVX2-NEXT: retq +; AVX-LABEL: load_cvt_8i16_to_8f64: +; AVX: # %bb.0: +; AVX-NEXT: vcvtph2ps (%rdi), %ymm1 +; AVX-NEXT: vcvtps2pd %xmm1, %ymm0 +; AVX-NEXT: vextractf128 $1, %ymm1, %xmm1 +; AVX-NEXT: vcvtps2pd %xmm1, %ymm1 +; AVX-NEXT: retq ; ; AVX512-LABEL: load_cvt_8i16_to_8f64: ; AVX512: # %bb.0: @@ -526,19 +481,12 @@ define <8 x i16> @cvt_8f32_to_8i16(<8 x float> %a0) nounwind { } define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { -; AVX1-LABEL: cvt_16f32_to_16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; AVX1-NEXT: vcvtps2ph $4, %ymm1, %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: cvt_16f32_to_16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtps2ph $4, %ymm0, %xmm0 -; AVX2-NEXT: vcvtps2ph $4, %ymm1, %xmm1 -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq +; AVX-LABEL: cvt_16f32_to_16i16: +; AVX: # %bb.0: +; AVX-NEXT: vcvtps2ph $4, %ymm0, %xmm0 +; AVX-NEXT: vcvtps2ph $4, %ymm1, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq ; ; AVX512-LABEL: cvt_16f32_to_16i16: ; AVX512: # %bb.0: @@ -616,19 +564,12 @@ define void @store_cvt_8f32_to_8i16(<8 x float> %a0, ptr %a1) nounwind { } define void @store_cvt_16f32_to_16i16(<16 x float> %a0, ptr %a1) nounwind { -; AVX1-LABEL: store_cvt_16f32_to_16i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtps2ph $4, %ymm1, 16(%rdi) -; AVX1-NEXT: vcvtps2ph $4, %ymm0, (%rdi) -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: store_cvt_16f32_to_16i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtps2ph $4, %ymm1, 16(%rdi) -; AVX2-NEXT: vcvtps2ph $4, %ymm0, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: store_cvt_16f32_to_16i16: +; AVX: # %bb.0: +; AVX-NEXT: vcvtps2ph $4, %ymm1, 16(%rdi) +; AVX-NEXT: vcvtps2ph $4, %ymm0, (%rdi) +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq ; ; AVX512-LABEL: store_cvt_16f32_to_16i16: ; AVX512: # %bb.0: @@ -708,25 +649,15 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { } define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind { -; AVX1-LABEL: cvt_8f64_to_8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtpd2ps %ymm1, %xmm1 -; AVX1-NEXT: vcvtps2ph $0, %xmm1, %xmm1 -; AVX1-NEXT: vcvtpd2ps %ymm0, %xmm0 -; AVX1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: cvt_8f64_to_8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtpd2ps %ymm1, %xmm1 -; AVX2-NEXT: vcvtps2ph $0, %xmm1, %xmm1 -; AVX2-NEXT: vcvtpd2ps %ymm0, %xmm0 -; AVX2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: cvt_8f64_to_8i16: +; AVX: # %bb.0: +; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1 +; AVX-NEXT: vcvtps2ph $0, %xmm1, %xmm1 +; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0 +; AVX-NEXT: vcvtps2ph $0, %xmm0, %xmm0 +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq ; ; AVX512-LABEL: cvt_8f64_to_8i16: ; AVX512: # %bb.0: @@ -814,27 +745,16 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, ptr %a1) nounwind { } define void @store_cvt_8f64_to_8i16(<8 x double> %a0, ptr %a1) nounwind { -; AVX1-LABEL: store_cvt_8f64_to_8i16: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtpd2ps %ymm1, %xmm1 -; AVX1-NEXT: vcvtps2ph $0, %xmm1, %xmm1 -; AVX1-NEXT: vcvtpd2ps %ymm0, %xmm0 -; AVX1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX1-NEXT: vmovaps %xmm0, (%rdi) -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: store_cvt_8f64_to_8i16: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtpd2ps %ymm1, %xmm1 -; AVX2-NEXT: vcvtps2ph $0, %xmm1, %xmm1 -; AVX2-NEXT: vcvtpd2ps %ymm0, %xmm0 -; AVX2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX2-NEXT: vmovaps %xmm0, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: store_cvt_8f64_to_8i16: +; AVX: # %bb.0: +; AVX-NEXT: vcvtpd2ps %ymm1, %xmm1 +; AVX-NEXT: vcvtps2ph $0, %xmm1, %xmm1 +; AVX-NEXT: vcvtpd2ps %ymm0, %xmm0 +; AVX-NEXT: vcvtps2ph $0, %xmm0, %xmm0 +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovaps %xmm0, (%rdi) +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq ; ; AVX512-LABEL: store_cvt_8f64_to_8i16: ; AVX512: # %bb.0: @@ -849,23 +769,14 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, ptr %a1) nounwind { } define void @store_cvt_32f32_to_32f16(<32 x float> %a0, ptr %a1) nounwind { -; AVX1-LABEL: store_cvt_32f32_to_32f16: -; AVX1: # %bb.0: -; AVX1-NEXT: vcvtps2ph $4, %ymm3, 48(%rdi) -; AVX1-NEXT: vcvtps2ph $4, %ymm2, 32(%rdi) -; AVX1-NEXT: vcvtps2ph $4, %ymm1, 16(%rdi) -; AVX1-NEXT: vcvtps2ph $4, %ymm0, (%rdi) -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: store_cvt_32f32_to_32f16: -; AVX2: # %bb.0: -; AVX2-NEXT: vcvtps2ph $4, %ymm3, 48(%rdi) -; AVX2-NEXT: vcvtps2ph $4, %ymm2, 32(%rdi) -; AVX2-NEXT: vcvtps2ph $4, %ymm1, 16(%rdi) -; AVX2-NEXT: vcvtps2ph $4, %ymm0, (%rdi) -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; AVX-LABEL: store_cvt_32f32_to_32f16: +; AVX: # %bb.0: +; AVX-NEXT: vcvtps2ph $4, %ymm3, 48(%rdi) +; AVX-NEXT: vcvtps2ph $4, %ymm2, 32(%rdi) +; AVX-NEXT: vcvtps2ph $4, %ymm1, 16(%rdi) +; AVX-NEXT: vcvtps2ph $4, %ymm0, (%rdi) +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq ; ; AVX512-LABEL: store_cvt_32f32_to_32f16: ; AVX512: # %bb.0: