Skip to content

Commit

Permalink
[X86] Type legalize v2f32 stores by widening to v4f32, casting to v2f…
Browse files Browse the repository at this point in the history
…64, extracting f64 and storing.

Summary: This is similar to what D52528 did for loads. It should match what generic type legalization does in 64-bit mode where it uses a v2i64 cast and an i64 store.

Reviewers: RKSimon, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D53173

llvm-svn: 344470
  • Loading branch information
topperc committed Oct 14, 2018
1 parent 72e5d93 commit ec4b75f
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 77 deletions.
47 changes: 34 additions & 13 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -902,8 +902,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);

// We want to legalize this to an f64 load rather than an i64 load on
// 64-bit targets and two 32-bit loads on a 32-bit target.
// 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
// store.
setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
setOperationAction(ISD::STORE, MVT::v2f32, Custom);

setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
Expand Down Expand Up @@ -19943,18 +19945,36 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
SDValue StoredVal = St->getValue();

// Without AVX512DQ, we need to use a scalar type for v2i1/v4i1/v8i1 loads.
assert(StoredVal.getValueType().isVector() &&
StoredVal.getValueType().getVectorElementType() == MVT::i1 &&
StoredVal.getValueType().getVectorNumElements() <= 8 &&
"Unexpected VT");
assert(!St->isTruncatingStore() && "Expected non-truncating store");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
"Expected AVX512F without AVX512DQI");

StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
DAG.getUNDEF(MVT::v8i1), StoredVal,
if (StoredVal.getValueType().isVector() &&
StoredVal.getValueType().getVectorElementType() == MVT::i1) {
assert(StoredVal.getValueType().getVectorNumElements() <= 8 &&
"Unexpected VT");
assert(!St->isTruncatingStore() && "Expected non-truncating store");
assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() &&
"Expected AVX512F without AVX512DQI");

StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1,
DAG.getUNDEF(MVT::v8i1), StoredVal,
DAG.getIntPtrConstant(0, dl));
StoredVal = DAG.getBitcast(MVT::i8, StoredVal);

return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
}

if (St->isTruncatingStore())
return SDValue();

assert(StoredVal.getValueType() == MVT::v2f32 && "Unexpected VT");

// Widen the vector, cast to a v2x64 type, extract the single 64-bit
// element and store it.
StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, StoredVal,
DAG.getUNDEF(MVT::v2f32));
StoredVal = DAG.getBitcast(MVT::v2f64, StoredVal);
StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, StoredVal,
DAG.getIntPtrConstant(0, dl));
StoredVal = DAG.getBitcast(MVT::i8, StoredVal);

return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(),
St->getPointerInfo(), St->getAlignment(),
Expand Down Expand Up @@ -36912,7 +36932,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
// Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
// pair instead.
if (Subtarget.is64Bit() || F64IsLegal) {
MVT LdVT = Subtarget.is64Bit() ? MVT::i64 : MVT::f64;
MVT LdVT = (Subtarget.is64Bit() &&
(!VT.isFloatingPoint() || !F64IsLegal)) ? MVT::i64 : MVT::f64;
SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
Ld->getMemOperand());

Expand Down
16 changes: 5 additions & 11 deletions llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
Expand Up @@ -8,8 +8,7 @@
define void @simple_widen(<2 x float> %a, <2 x float> %b) {
; X32-LABEL: simple_widen:
; X32: # %bb.0: # %entry
; X32-NEXT: extractps $1, %xmm1, (%eax)
; X32-NEXT: movss %xmm1, (%eax)
; X32-NEXT: movlps %xmm1, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: simple_widen:
Expand All @@ -28,8 +27,7 @@ define void @complex_inreg_work(<2 x float> %a, <2 x float> %b) {
; X32-NEXT: movaps %xmm0, %xmm2
; X32-NEXT: cmpordps %xmm0, %xmm0
; X32-NEXT: blendvps %xmm0, %xmm2, %xmm1
; X32-NEXT: extractps $1, %xmm1, (%eax)
; X32-NEXT: movss %xmm1, (%eax)
; X32-NEXT: movlps %xmm1, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: complex_inreg_work:
Expand All @@ -50,8 +48,7 @@ define void @zero_test() {
; X32-LABEL: zero_test:
; X32: # %bb.0: # %entry
; X32-NEXT: xorps %xmm0, %xmm0
; X32-NEXT: extractps $1, %xmm0, (%eax)
; X32-NEXT: movss %xmm0, (%eax)
; X32-NEXT: movlps %xmm0, (%eax)
; X32-NEXT: retl
;
; X64-LABEL: zero_test:
Expand Down Expand Up @@ -82,11 +79,8 @@ define void @full_test() {
; X32-NEXT: cmpeqps %xmm2, %xmm1
; X32-NEXT: movaps %xmm1, %xmm0
; X32-NEXT: blendvps %xmm0, %xmm2, %xmm4
; X32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
; X32-NEXT: movshdup {{.*#+}} xmm0 = xmm4[1,1,3,3]
; X32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
; X32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: movlps %xmm4, {{[0-9]+}}(%esp)
; X32-NEXT: movlps %xmm4, {{[0-9]+}}(%esp)
; X32-NEXT: addl $60, %esp
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl
Expand Down
44 changes: 17 additions & 27 deletions llvm/test/CodeGen/X86/sse-schedule.ll
Expand Up @@ -2712,8 +2712,7 @@ define <4 x float> @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2)
; GENERIC: # %bb.0:
; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
; GENERIC-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
Expand All @@ -2723,135 +2722,126 @@ define <4 x float> @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2)
; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00]
; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; ATOM-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] sched: [1:1.00]
; ATOM-NEXT: movlps %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: movhps %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movhps:
; SLM: # %bb.0:
; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
; SLM-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movhps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
; SANDY-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movhps:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movhps:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
; HASWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movhps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movhps:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
; BROADWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movhps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movhps:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
; SKYLAKE-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movhps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movhps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
; SKX-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
; SKX-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movhps:
; SKX: # %bb.0:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:0.50]
; BTVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movhps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00]
; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movhps:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:0.50]
; ZNVER1-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:0.50]
; ZNVER1-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:0.50]
; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movhps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50]
; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to <2 x float>*
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/vec_fptrunc.ll
Expand Up @@ -10,17 +10,15 @@ define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) {
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-SSE-NEXT: cvtpd2ps (%ecx), %xmm0
; X32-SSE-NEXT: extractps $1, %xmm0, 4(%eax)
; X32-SSE-NEXT: movss %xmm0, (%eax)
; X32-SSE-NEXT: movlpd %xmm0, (%eax)
; X32-SSE-NEXT: retl
;
; X32-AVX-LABEL: fptrunc_frommem2:
; X32-AVX: # %bb.0: # %entry
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX-NEXT: vcvtpd2psx (%ecx), %xmm0
; X32-AVX-NEXT: vextractps $1, %xmm0, 4(%eax)
; X32-AVX-NEXT: vmovss %xmm0, (%eax)
; X32-AVX-NEXT: vmovlpd %xmm0, (%eax)
; X32-AVX-NEXT: retl
;
; X64-SSE-LABEL: fptrunc_frommem2:
Expand Down
31 changes: 9 additions & 22 deletions llvm/test/CodeGen/X86/widen_conv-3.ll
Expand Up @@ -7,28 +7,15 @@
; sign to float v2i16 to v2f32

define void @convert_v2i16_to_v2f32(<2 x float>* %dst.addr, <2 x i16> %src) nounwind {
; X86-SSE2-LABEL: convert_v2i16_to_v2f32:
; X86-SSE2: # %bb.0: # %entry
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE2-NEXT: psllq $48, %xmm0
; X86-SSE2-NEXT: psrad $16, %xmm0
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-SSE2-NEXT: movss %xmm0, (%eax)
; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-SSE2-NEXT: movss %xmm0, 4(%eax)
; X86-SSE2-NEXT: retl
;
; X86-SSE42-LABEL: convert_v2i16_to_v2f32:
; X86-SSE42: # %bb.0: # %entry
; X86-SSE42-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-SSE42-NEXT: psllq $48, %xmm0
; X86-SSE42-NEXT: psrad $16, %xmm0
; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X86-SSE42-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-SSE42-NEXT: extractps $1, %xmm0, 4(%eax)
; X86-SSE42-NEXT: movss %xmm0, (%eax)
; X86-SSE42-NEXT: retl
; X86-LABEL: convert_v2i16_to_v2f32:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: psllq $48, %xmm0
; X86-NEXT: psrad $16, %xmm0
; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3]
; X86-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-NEXT: movlps %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: convert_v2i16_to_v2f32:
; X64: # %bb.0: # %entry
Expand Down

0 comments on commit ec4b75f

Please sign in to comment.