Skip to content

Commit

Permalink
[AArch64][GISel] Additional FPTrunc vector lowering
Browse files Browse the repository at this point in the history
I was attempting to add llvm.reduce.fminimum/fmaximum support for GlobalISel.
In the process I noticed that llvm.reduce.fmin/fmax was missing, and could do
with being added first. That led on to adding additional vector support for
minnum/maxnum, which in turn led to needing to handle fptrunc and fpext for
some of the fp16 types. So this patch extends the vector handling for fptrunc,
adding support for f16 types which are clamped to 4 elements, and scalarizing
the rest.

I went round in circles a little with how smaller than legal vectors should be
handled, but this seems simple and seems to work, if not always optimally yet.

Differential Revision: https://reviews.llvm.org/D155311
  • Loading branch information
davemgreen committed Jul 18, 2023
1 parent f3dfcc5 commit 74c0bdf
Show file tree
Hide file tree
Showing 4 changed files with 261 additions and 3 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4950,6 +4950,18 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
case TargetOpcode::G_FPTRUNC: {
if (TypeIdx != 0)
return UnableToLegalize;
Observer.changingInstr(MI);
LLT SrcTy = LLT::fixed_vector(
MoreTy.getNumElements(),
MRI.getType(MI.getOperand(1).getReg()).getElementType());
moreElementsVectorSrc(MI, SrcTy, 1);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
default:
return UnableToLegalize;
}
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FPTRUNC)
.legalFor(
{{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
.clampMaxNumElements(0, s32, 2);
.clampNumElements(0, v4s16, v4s16)
.clampNumElements(0, v2s32, v2s32)
.scalarize(0);

getActionDefinitionsBuilder(G_FPEXT)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
Expand Down
8 changes: 6 additions & 2 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-fptrunc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,12 @@ body: |
; CHECK: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<2 x s16>) = G_FPTRUNC [[COPY]](<2 x s32>)
; CHECK-NEXT: $s0 = COPY [[FPTRUNC]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32)
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(<4 x s16>) = G_FPTRUNC [[BUILD_VECTOR]](<4 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FPTRUNC]](<4 x s16>)
; CHECK-NEXT: $s0 = COPY [[UV2]](<2 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s16>) = G_FPTRUNC %0
Expand Down
239 changes: 239 additions & 0 deletions llvm/test/CodeGen/AArch64/fptrunc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=aarch64-none-eabi -global-isel=0 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-eabi -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI

define float @fptrunc_f64_f32(double %a) {
; CHECK-LABEL: fptrunc_f64_f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvt s0, d0
; CHECK-NEXT: ret
entry:
%c = fptrunc double %a to float
ret float %c
}

define half @fptrunc_f64_f16(double %a) {
; CHECK-LABEL: fptrunc_f64_f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvt h0, d0
; CHECK-NEXT: ret
entry:
%c = fptrunc double %a to half
ret half %c
}

define half @fptrunc_f32_f16(float %a) {
; CHECK-LABEL: fptrunc_f32_f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
entry:
%c = fptrunc float %a to half
ret half %c
}

define <2 x float> @fptrunc_v2f64_v2f32(<2 x double> %a) {
; CHECK-LABEL: fptrunc_v2f64_v2f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NEXT: ret
entry:
%c = fptrunc <2 x double> %a to <2 x float>
ret <2 x float> %c
}

define <3 x float> @fptrunc_v3f64_v3f32(<3 x double> %a) {
; CHECK-SD-LABEL: fptrunc_v3f64_v3f32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: fcvtn v0.2s, v0.2d
; CHECK-SD-NEXT: fcvtn2 v0.4s, v2.2d
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v3f64_v3f32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: fcvt s1, d2
; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d
; CHECK-GI-NEXT: mov s2, v0.s[1]
; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
; CHECK-GI-NEXT: mov v0.s[2], v1.s[0]
; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
; CHECK-GI-NEXT: ret
entry:
%c = fptrunc <3 x double> %a to <3 x float>
ret <3 x float> %c
}

define <4 x float> @fptrunc_v4f64_v4f32(<4 x double> %a) {
; CHECK-LABEL: fptrunc_v4f64_v4f32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtn v0.2s, v0.2d
; CHECK-NEXT: fcvtn2 v0.4s, v1.2d
; CHECK-NEXT: ret
entry:
%c = fptrunc <4 x double> %a to <4 x float>
ret <4 x float> %c
}

define <2 x half> @fptrunc_v2f64_v2f16(<2 x double> %a) {
; CHECK-SD-LABEL: fptrunc_v2f64_v2f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov d1, v0.d[1]
; CHECK-SD-NEXT: fcvt h0, d0
; CHECK-SD-NEXT: fcvt h1, d1
; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v2f64_v2f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d1, v0.d[1]
; CHECK-GI-NEXT: fcvt h0, d0
; CHECK-GI-NEXT: fcvt h1, d1
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: mov v0.h[2], v0.h[0]
; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%c = fptrunc <2 x double> %a to <2 x half>
ret <2 x half> %c
}

define <3 x half> @fptrunc_v3f64_v3f16(<3 x double> %a) {
; CHECK-SD-LABEL: fptrunc_v3f64_v3f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcvt h1, d1
; CHECK-SD-NEXT: fcvt h0, d0
; CHECK-SD-NEXT: mov v0.h[1], v1.h[0]
; CHECK-SD-NEXT: fcvt h1, d2
; CHECK-SD-NEXT: mov v0.h[2], v1.h[0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v3f64_v3f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: fcvt h0, d0
; CHECK-GI-NEXT: fcvt h1, d1
; CHECK-GI-NEXT: fcvt h2, d2
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%c = fptrunc <3 x double> %a to <3 x half>
ret <3 x half> %c
}

define <4 x half> @fptrunc_v4f64_v4f16(<4 x double> %a) {
; CHECK-SD-LABEL: fptrunc_v4f64_v4f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov d2, v0.d[1]
; CHECK-SD-NEXT: fcvt h0, d0
; CHECK-SD-NEXT: fcvt h2, d2
; CHECK-SD-NEXT: mov v0.h[1], v2.h[0]
; CHECK-SD-NEXT: fcvt h2, d1
; CHECK-SD-NEXT: mov d1, v1.d[1]
; CHECK-SD-NEXT: mov v0.h[2], v2.h[0]
; CHECK-SD-NEXT: fcvt h1, d1
; CHECK-SD-NEXT: mov v0.h[3], v1.h[0]
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v4f64_v4f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov d2, v0.d[1]
; CHECK-GI-NEXT: fcvt h0, d0
; CHECK-GI-NEXT: mov d3, v1.d[1]
; CHECK-GI-NEXT: fcvt h1, d1
; CHECK-GI-NEXT: fcvt h2, d2
; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-GI-NEXT: fcvt h2, d3
; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-GI-NEXT: mov v0.h[3], v2.h[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%c = fptrunc <4 x double> %a to <4 x half>
ret <4 x half> %c
}

define <2 x half> @fptrunc_v2f32_v2f16(<2 x float> %a) {
; CHECK-SD-LABEL: fptrunc_v2f32_v2f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v2f32_v2f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov s1, v0.s[1]
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
; CHECK-GI-NEXT: mov v0.s[2], v0.s[0]
; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: mov v0.h[2], v0.h[0]
; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%c = fptrunc <2 x float> %a to <2 x half>
ret <2 x half> %c
}

define <3 x half> @fptrunc_v3f32_v3f16(<3 x float> %a) {
; CHECK-SD-LABEL: fptrunc_v3f32_v3f16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptrunc_v3f32_v3f16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: mov s1, v0.s[1]
; CHECK-GI-NEXT: mov s2, v0.s[2]
; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
; CHECK-GI-NEXT: mov v0.s[2], v2.s[0]
; CHECK-GI-NEXT: mov v0.s[3], v0.s[0]
; CHECK-GI-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NEXT: mov h1, v0.h[1]
; CHECK-GI-NEXT: mov h2, v0.h[2]
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
; CHECK-GI-NEXT: mov v0.h[3], v0.h[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
entry:
%c = fptrunc <3 x float> %a to <3 x half>
ret <3 x half> %c
}

define <4 x half> @fptrunc_v4f32_v4f16(<4 x float> %a) {
; CHECK-LABEL: fptrunc_v4f32_v4f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: ret
entry:
%c = fptrunc <4 x float> %a to <4 x half>
ret <4 x half> %c
}

define <8 x half> @fptrunc_v8f32_v8f16(<8 x float> %a) {
; CHECK-LABEL: fptrunc_v8f32_v8f16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: fcvtn2 v0.8h, v1.4s
; CHECK-NEXT: ret
entry:
%c = fptrunc <8 x float> %a to <8 x half>
ret <8 x half> %c
}

0 comments on commit 74c0bdf

Please sign in to comment.