Skip to content

Commit

Permalink
[X86][FP16] Add the missing legal action for EXTRACT_SUBVECTOR
Browse files Browse the repository at this point in the history
Fixes #57340

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D132563

(cherry picked from commit 12b203e)
  • Loading branch information
phoebewang authored and tru committed Aug 25, 2022
1 parent 9c29291 commit 3d3ef9d
Show file tree
Hide file tree
Showing 4 changed files with 337 additions and 34 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -1521,7 +1521,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
MVT::v4f32, MVT::v2f64 }) {
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
}

Expand Down Expand Up @@ -1861,7 +1861,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (result) is 256-bit but the source is 512-bit wide.
// 128-bit was made Legal under AVX1.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
MVT::v8f32, MVT::v4f64 })
MVT::v16f16, MVT::v8f32, MVT::v4f64 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);

for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
Expand Down
26 changes: 10 additions & 16 deletions llvm/test/CodeGen/X86/avx512-f16c-v16f16-fadd.ll
Expand Up @@ -4,23 +4,17 @@
define <16 x half> @foo(<16 x half> %a, <16 x half> %b) nounwind {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: andq $-32, %rsp
; CHECK-NEXT: subq $96, %rsp
; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %ymm0, (%rsp)
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm0
; CHECK-NEXT: vcvtph2ps (%rsp), %ymm1
; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
; CHECK-NEXT: vcvtph2ps %xmm1, %ymm2
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm3
; CHECK-NEXT: vaddps %ymm2, %ymm3, %ymm2
; CHECK-NEXT: vcvtps2ph $4, %ymm2, %xmm2
; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1
; CHECK-NEXT: vcvtph2ps %xmm1, %ymm1
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-NEXT: vcvtph2ps %xmm0, %ymm0
; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vcvtps2ph $4, %ymm0, %xmm0
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm1
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %ymm2
; CHECK-NEXT: vaddps %ymm1, %ymm2, %ymm1
; CHECK-NEXT: vcvtps2ph $4, %ymm1, %xmm1
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; CHECK-NEXT: retq
%1 = fadd <16 x half> %a, %b
ret <16 x half> %1
Expand Down
26 changes: 10 additions & 16 deletions llvm/test/CodeGen/X86/avx512-skx-v32f16-fadd.ll
Expand Up @@ -4,23 +4,17 @@
define <32 x half> @foo(<32 x half> %a, <32 x half> %b) nounwind {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: andq $-64, %rsp
; CHECK-NEXT: subq $192, %rsp
; CHECK-NEXT: vmovaps %zmm1, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps %zmm0, (%rsp)
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm0
; CHECK-NEXT: vcvtph2ps (%rsp), %zmm1
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: vcvtph2ps %ymm1, %zmm2
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm3
; CHECK-NEXT: vaddps %zmm2, %zmm3, %zmm2
; CHECK-NEXT: vcvtps2ph $4, %zmm2, %ymm2
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
; CHECK-NEXT: vcvtph2ps %ymm1, %zmm1
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; CHECK-NEXT: vcvtph2ps %ymm0, %zmm0
; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm1
; CHECK-NEXT: vcvtph2ps {{[0-9]+}}(%rsp), %zmm2
; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1
; CHECK-NEXT: vcvtps2ph $4, %zmm1, %ymm1
; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = fadd <32 x half> %a, %b
ret <32 x half> %1
Expand Down

0 comments on commit 3d3ef9d

Please sign in to comment.