Skip to content

Commit

Permalink
[AArch64][GlobalISel] G_FMINNUM and G_FMAXNUM vector lowering
Browse files Browse the repository at this point in the history
This attempts to expand the handling for G_FMAXNUM/G_FMINNUM for vector types,
which is hopefully fairly straightforward now that fptrunc and fpext are
working.

Differential Revision: https://reviews.llvm.org/D156171
  • Loading branch information
davemgreen committed Jul 31, 2023
1 parent 559d5bb commit e8e49a3
Show file tree
Hide file tree
Showing 4 changed files with 1,281 additions and 765 deletions.
13 changes: 10 additions & 3 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -919,11 +919,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// TODO: Vector types.
getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));

// TODO: Vector types.
getActionDefinitionsBuilder({G_FMAXNUM, G_FMINNUM})
.legalFor({MinFPScalar, s32, s64})
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
.legalIf([=](const LegalityQuery &Query) {
const auto &Ty = Query.Types[0];
return (Ty == v8s16 || Ty == v4s16) && HasFP16;
})
.libcallFor({s128})
.minScalar(0, MinFPScalar);
.minScalarOrElt(0, MinFPScalar)
.clampNumElements(0, v4s16, v8s16)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);

getActionDefinitionsBuilder({G_FMAXIMUM, G_FMINIMUM})
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
Expand Down
159 changes: 96 additions & 63 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-fminnum.mir
Original file line number Diff line number Diff line change
@@ -1,28 +1,33 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=legalizer -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=FP16
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=NO-FP16
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=legalizer -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
...
---
name: s16_legal_with_full_fp16
alignment: 4
body: |
bb.0:
liveins: $h0, $h1
; FP16-LABEL: name: s16_legal_with_full_fp16
; FP16: %a:_(s16) = COPY $h0
; FP16: %b:_(s16) = COPY $h1
; FP16: %minnum:_(s16) = G_FMINNUM %a, %b
; FP16: $h0 = COPY %minnum(s16)
; FP16: RET_ReallyLR implicit $h0
; NO-FP16-LABEL: name: s16_legal_with_full_fp16
; NO-FP16: %a:_(s16) = COPY $h0
; NO-FP16: %b:_(s16) = COPY $h1
; NO-FP16: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16)
; NO-FP16: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16)
; NO-FP16: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[FPEXT]], [[FPEXT1]]
; NO-FP16: %minnum:_(s16) = G_FPTRUNC [[FMINNUM]](s32)
; NO-FP16: $h0 = COPY %minnum(s16)
; NO-FP16: RET_ReallyLR implicit $h0
; CHECK-FP16-LABEL: name: s16_legal_with_full_fp16
; CHECK-FP16: liveins: $h0, $h1
; CHECK-FP16-NEXT: {{ $}}
; CHECK-FP16-NEXT: %a:_(s16) = COPY $h0
; CHECK-FP16-NEXT: %b:_(s16) = COPY $h1
; CHECK-FP16-NEXT: %minnum:_(s16) = G_FMINNUM %a, %b
; CHECK-FP16-NEXT: $h0 = COPY %minnum(s16)
; CHECK-FP16-NEXT: RET_ReallyLR implicit $h0
;
; CHECK-NOFP16-LABEL: name: s16_legal_with_full_fp16
; CHECK-NOFP16: liveins: $h0, $h1
; CHECK-NOFP16-NEXT: {{ $}}
; CHECK-NOFP16-NEXT: %a:_(s16) = COPY $h0
; CHECK-NOFP16-NEXT: %b:_(s16) = COPY $h1
; CHECK-NOFP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %a(s16)
; CHECK-NOFP16-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %b(s16)
; CHECK-NOFP16-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[FPEXT]], [[FPEXT1]]
; CHECK-NOFP16-NEXT: %minnum:_(s16) = G_FPTRUNC [[FMINNUM]](s32)
; CHECK-NOFP16-NEXT: $h0 = COPY %minnum(s16)
; CHECK-NOFP16-NEXT: RET_ReallyLR implicit $h0
%a:_(s16) = COPY $h0
%b:_(s16) = COPY $h1
%minnum:_(s16) = G_FMINNUM %a, %b
Expand All @@ -36,18 +41,14 @@ alignment: 4
body: |
bb.0:
liveins: $s0, $s1
; FP16-LABEL: name: s32_legal
; FP16: %a:_(s32) = COPY $s0
; FP16: %b:_(s32) = COPY $s1
; FP16: %minnum:_(s32) = G_FMINNUM %a, %b
; FP16: $s0 = COPY %minnum(s32)
; FP16: RET_ReallyLR implicit $s0
; NO-FP16-LABEL: name: s32_legal
; NO-FP16: %a:_(s32) = COPY $s0
; NO-FP16: %b:_(s32) = COPY $s1
; NO-FP16: %minnum:_(s32) = G_FMINNUM %a, %b
; NO-FP16: $s0 = COPY %minnum(s32)
; NO-FP16: RET_ReallyLR implicit $s0
; CHECK-LABEL: name: s32_legal
; CHECK: liveins: $s0, $s1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(s32) = COPY $s0
; CHECK-NEXT: %b:_(s32) = COPY $s1
; CHECK-NEXT: %minnum:_(s32) = G_FMINNUM %a, %b
; CHECK-NEXT: $s0 = COPY %minnum(s32)
; CHECK-NEXT: RET_ReallyLR implicit $s0
%a:_(s32) = COPY $s0
%b:_(s32) = COPY $s1
%minnum:_(s32) = G_FMINNUM %a, %b
Expand All @@ -61,18 +62,14 @@ alignment: 4
body: |
bb.0:
liveins: $d0, $d1
; FP16-LABEL: name: s64_legal
; FP16: %a:_(s64) = COPY $d0
; FP16: %b:_(s64) = COPY $d1
; FP16: %minnum:_(s64) = G_FMINNUM %a, %b
; FP16: $d0 = COPY %minnum(s64)
; FP16: RET_ReallyLR implicit $d0
; NO-FP16-LABEL: name: s64_legal
; NO-FP16: %a:_(s64) = COPY $d0
; NO-FP16: %b:_(s64) = COPY $d1
; NO-FP16: %minnum:_(s64) = G_FMINNUM %a, %b
; NO-FP16: $d0 = COPY %minnum(s64)
; NO-FP16: RET_ReallyLR implicit $d0
; CHECK-LABEL: name: s64_legal
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(s64) = COPY $d0
; CHECK-NEXT: %b:_(s64) = COPY $d1
; CHECK-NEXT: %minnum:_(s64) = G_FMINNUM %a, %b
; CHECK-NEXT: $d0 = COPY %minnum(s64)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%a:_(s64) = COPY $d0
%b:_(s64) = COPY $d1
%minnum:_(s64) = G_FMINNUM %a, %b
Expand All @@ -86,30 +83,66 @@ alignment: 4
body: |
bb.0:
liveins: $q0, $q1
; FP16-LABEL: name: s128_libcall
; FP16: %a:_(s128) = COPY $q0
; FP16: %b:_(s128) = COPY $q1
; FP16: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; FP16: $q0 = COPY %a(s128)
; FP16: $q1 = COPY %b(s128)
; FP16: BL &fminl, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $q0, implicit $q1, implicit-def $q0
; FP16: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; FP16: %minnum:_(s128) = COPY $q0
; FP16: $q0 = COPY %minnum(s128)
; FP16: RET_ReallyLR implicit $q0
; NO-FP16-LABEL: name: s128_libcall
; NO-FP16: %a:_(s128) = COPY $q0
; NO-FP16: %b:_(s128) = COPY $q1
; NO-FP16: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; NO-FP16: $q0 = COPY %a(s128)
; NO-FP16: $q1 = COPY %b(s128)
; NO-FP16: BL &fminl, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $q0, implicit $q1, implicit-def $q0
; NO-FP16: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; NO-FP16: %minnum:_(s128) = COPY $q0
; NO-FP16: $q0 = COPY %minnum(s128)
; NO-FP16: RET_ReallyLR implicit $q0
; CHECK-LABEL: name: s128_libcall
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(s128) = COPY $q0
; CHECK-NEXT: %b:_(s128) = COPY $q1
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $q0 = COPY %a(s128)
; CHECK-NEXT: $q1 = COPY %b(s128)
; CHECK-NEXT: BL &fminl, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $q0, implicit $q1, implicit-def $q0
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: %minnum:_(s128) = COPY $q0
; CHECK-NEXT: $q0 = COPY %minnum(s128)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%a:_(s128) = COPY $q0
%b:_(s128) = COPY $q1
%minnum:_(s128) = G_FMINNUM %a, %b
$q0 = COPY %minnum(s128)
RET_ReallyLR implicit $q0
...
---
name: v4s32_legal
alignment: 4
body: |
bb.0:
liveins: $q0, $q1
; CHECK-LABEL: name: v4s32_legal
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %a:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: %b:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %minnum:_(<4 x s32>) = G_FMINNUM %a, %b
; CHECK-NEXT: $q0 = COPY %minnum(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%a:_(<4 x s32>) = COPY $q0
%b:_(<4 x s32>) = COPY $q1
%minnum:_(<4 x s32>) = G_FMINNUM %a, %b
$q0 = COPY %minnum(<4 x s32>)
RET_ReallyLR implicit $q0
...
---
name: v3s32_widen
alignment: 4
body: |
bb.1.entry:
liveins: $q0, $q1
%2:_(<2 x s64>) = COPY $q0
%3:_(<4 x s32>) = G_BITCAST %2:_(<2 x s64>)
%4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32) = G_UNMERGE_VALUES %3:_(<4 x s32>)
%0:_(<3 x s32>) = G_BUILD_VECTOR %4:_(s32), %5:_(s32), %6:_(s32)
%8:_(<2 x s64>) = COPY $q1
%9:_(<4 x s32>) = G_BITCAST %8:_(<2 x s64>)
%10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %9:_(<4 x s32>)
%1:_(<3 x s32>) = G_BUILD_VECTOR %10:_(s32), %11:_(s32), %12:_(s32)
%14:_(<3 x s32>) = G_FMINNUM %0:_, %1:_
%15:_(s32), %16:_(s32), %17:_(s32) = G_UNMERGE_VALUES %14:_(<3 x s32>)
%18:_(s32) = G_IMPLICIT_DEF
%19:_(<4 x s32>) = G_BUILD_VECTOR %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32)
$q0 = COPY %19:_(<4 x s32>)
RET_ReallyLR implicit $q0
...
Original file line number Diff line number Diff line change
Expand Up @@ -521,11 +521,11 @@
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_FMINNUM (opcode {{[0-9]+}}): 1 type index
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FMAXNUM (opcode {{[0-9]+}}): 1 type index
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FMINNUM_IEEE (opcode {{[0-9]+}}): 1 type index
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
Expand Down

0 comments on commit e8e49a3

Please sign in to comment.