Skip to content

Commit

Permalink
[AArch64] Use custom lowering for {U,S}INT_TO_FP with i8.
Browse files Browse the repository at this point in the history
With fullfp16, it is cheaper to cast the {U,S}INT_TO_FP operand to i16
first, rather than promoting it to i32. The custom lowering for
{U,S}INT_TO_FP  already supports that, it just needs to be used.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D113601
  • Loading branch information
fhahn committed Nov 11, 2021
1 parent 53dc525 commit c2ed9fd
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 64 deletions.
12 changes: 8 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -1015,10 +1015,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);

// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
Expand All @@ -1031,13 +1027,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);

if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
Expand Down
148 changes: 88 additions & 60 deletions llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
Expand Up @@ -414,41 +414,55 @@ define <4 x half> @sitofp_v4i8(<4 x i8> %a) #0 {
}

define <8 x half> @sitofp_v8i8(<8 x i8> %a) #0 {
; CHECK-LABEL: sitofp_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: scvtf v1.4s, v1.4s
; CHECK-NEXT: scvtf v0.4s, v0.4s
; CHECK-NEXT: fcvtn v1.4h, v1.4s
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
; CHECK-CVT-LABEL: sitofp_v8i8:
; CHECK-CVT: // %bb.0:
; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-CVT-NEXT: sshll2 v1.4s, v0.8h, #0
; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s
; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-NEXT: mov v0.d[1], v1.d[0]
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: sitofp_v8i8:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
%1 = sitofp <8 x i8> %a to <8 x half>
ret <8 x half> %1
}

define <16 x half> @sitofp_v16i8(<16 x i8> %a) #0 {
; CHECK-LABEL: sitofp_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0
; CHECK-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0
; CHECK-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0
; CHECK-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-NEXT: scvtf v2.4s, v2.4s
; CHECK-NEXT: scvtf v1.4s, v1.4s
; CHECK-NEXT: scvtf v3.4s, v3.4s
; CHECK-NEXT: scvtf v0.4s, v0.4s
; CHECK-NEXT: fcvtn v2.4h, v2.4s
; CHECK-NEXT: fcvtn v1.4h, v1.4s
; CHECK-NEXT: fcvtn v3.4h, v3.4s
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: mov v1.d[1], v2.d[0]
; CHECK-NEXT: mov v0.d[1], v3.d[0]
; CHECK-NEXT: ret
; CHECK-CVT-LABEL: sitofp_v16i8:
; CHECK-CVT: // %bb.0:
; CHECK-CVT-NEXT: sshll2 v1.8h, v0.16b, #0
; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-CVT-NEXT: sshll2 v2.4s, v1.8h, #0
; CHECK-CVT-NEXT: sshll v1.4s, v1.4h, #0
; CHECK-CVT-NEXT: sshll2 v3.4s, v0.8h, #0
; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0
; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s
; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s
; CHECK-CVT-NEXT: scvtf v3.4s, v3.4s
; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s
; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
; CHECK-CVT-NEXT: fcvtn v3.4h, v3.4s
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0]
; CHECK-CVT-NEXT: mov v0.d[1], v3.d[0]
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: sitofp_v16i8:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: sshll2 v1.8h, v0.16b, #0
; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-FP16-NEXT: scvtf v1.8h, v1.8h
; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
%1 = sitofp <16 x i8> %a to <16 x half>
ret <16 x half> %1
}
Expand Down Expand Up @@ -525,41 +539,55 @@ define <4 x half> @uitofp_v4i8(<4 x i8> %a) #0 {
}

define <8 x half> @uitofp_v8i8(<8 x i8> %a) #0 {
; CHECK-LABEL: uitofp_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ucvtf v1.4s, v1.4s
; CHECK-NEXT: ucvtf v0.4s, v0.4s
; CHECK-NEXT: fcvtn v1.4h, v1.4s
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
; CHECK-CVT-LABEL: uitofp_v8i8:
; CHECK-CVT: // %bb.0:
; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-CVT-NEXT: ushll2 v1.4s, v0.8h, #0
; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s
; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-NEXT: mov v0.d[1], v1.d[0]
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: uitofp_v8i8:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
%1 = uitofp <8 x i8> %a to <8 x half>
ret <8 x half> %1
}

define <16 x half> @uitofp_v16i8(<16 x i8> %a) #0 {
; CHECK-LABEL: uitofp_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: ucvtf v2.4s, v2.4s
; CHECK-NEXT: ucvtf v1.4s, v1.4s
; CHECK-NEXT: ucvtf v3.4s, v3.4s
; CHECK-NEXT: ucvtf v0.4s, v0.4s
; CHECK-NEXT: fcvtn v2.4h, v2.4s
; CHECK-NEXT: fcvtn v1.4h, v1.4s
; CHECK-NEXT: fcvtn v3.4h, v3.4s
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: mov v1.d[1], v2.d[0]
; CHECK-NEXT: mov v0.d[1], v3.d[0]
; CHECK-NEXT: ret
; CHECK-CVT-LABEL: uitofp_v16i8:
; CHECK-CVT: // %bb.0:
; CHECK-CVT-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-CVT-NEXT: ushll2 v2.4s, v1.8h, #0
; CHECK-CVT-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-CVT-NEXT: ushll2 v3.4s, v0.8h, #0
; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s
; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s
; CHECK-CVT-NEXT: ucvtf v3.4s, v3.4s
; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s
; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s
; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s
; CHECK-CVT-NEXT: fcvtn v3.4h, v3.4s
; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0]
; CHECK-CVT-NEXT: mov v0.d[1], v3.d[0]
; CHECK-CVT-NEXT: ret
;
; CHECK-FP16-LABEL: uitofp_v16i8:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: ushll2 v1.8h, v0.16b, #0
; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-FP16-NEXT: ucvtf v1.8h, v1.8h
; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h
; CHECK-FP16-NEXT: ret
%1 = uitofp <16 x i8> %a to <16 x half>
ret <16 x half> %1
}
Expand Down

0 comments on commit c2ed9fd

Please sign in to comment.