Skip to content

Commit

Permalink
[AArch64] Allow FP16 vector fixed point converts
Browse files Browse the repository at this point in the history
This extends performFpToIntCombine to work on FP16 vectors as well as
the f32 and f64 vectors it already supported.

Differential Revision: https://reviews.llvm.org/D113297
  • Loading branch information
davemgreen committed Nov 11, 2021
1 parent b24ec07 commit 703ded8
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 30 deletions.
22 changes: 4 additions & 18 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -13395,7 +13395,8 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,

MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
uint32_t FloatBits = FloatTy.getSizeInBits();
if (FloatBits != 32 && FloatBits != 64)
if (FloatBits != 32 && FloatBits != 64 &&
(FloatBits != 16 || !Subtarget->hasFullFP16()))
return SDValue();

MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
Expand All @@ -13414,25 +13415,10 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
if (C == -1 || C == 0 || C > Bits)
return SDValue();

MVT ResTy;
unsigned NumLanes = Op.getValueType().getVectorNumElements();
switch (NumLanes) {
default:
return SDValue();
case 2:
ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
break;
case 4:
ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
break;
}

if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
EVT ResTy = Op.getValueType().changeVectorElementTypeToInteger();
if (!DAG.getTargetLoweringInfo().isTypeLegal(ResTy))
return SDValue();

assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
"Illegal vector type after legalization");

if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
N->getOpcode() == ISD::FP_TO_UINT_SAT) {
EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
Expand Down
16 changes: 4 additions & 12 deletions llvm/test/CodeGen/AArch64/fcvt_combine.ll
Expand Up @@ -228,9 +228,7 @@ define <8 x i16> @test_v8f16(<8 x half> %in) {
;
; CHECK-FP16-LABEL: test_v8f16:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: movi v1.8h, #68, lsl #8
; CHECK-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h, #2
; CHECK-FP16-NEXT: ret
%scale = fmul <8 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0>
%val = fptosi <8 x half> %scale to <8 x i16>
Expand All @@ -251,9 +249,7 @@ define <4 x i16> @test_v4f16(<4 x half> %in) {
;
; CHECK-FP16-LABEL: test_v4f16:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: movi v1.4h, #68, lsl #8
; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h
; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h, #2
; CHECK-FP16-NEXT: ret
%scale = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
%val = fptoui <4 x half> %scale to <4 x i16>
Expand Down Expand Up @@ -580,9 +576,7 @@ define <8 x i16> @test_v8f16_sat(<8 x half> %in) {
;
; CHECK-FP16-LABEL: test_v8f16_sat:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: movi v1.8h, #68, lsl #8
; CHECK-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h
; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h
; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h, #2
; CHECK-FP16-NEXT: ret
%mul.i = fmul <8 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0, half 4.0>
%val = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %mul.i)
Expand All @@ -603,9 +597,7 @@ define <4 x i16> @test_v4f16_sat(<4 x half> %in) {
;
; CHECK-FP16-LABEL: test_v4f16_sat:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: movi v1.4h, #68, lsl #8
; CHECK-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h
; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h, #2
; CHECK-FP16-NEXT: ret
%mul.i = fmul <4 x half> %in, <half 4.0, half 4.0, half 4.0, half 4.0>
%val = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> %mul.i)
Expand Down

0 comments on commit 703ded8

Please sign in to comment.