Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/docs/GlobalISel/GenericOpcode.rst
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,12 @@ G_FPTRUNC

Convert a floating point value to a narrower type.

G_FPTRUNC_ODD
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You should just use G_INTRINSIC_FPTRUNC_ROUND instead of introducing this

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. That is a new one to me and looks like an AMD-ism. We don't really have the instructions to easily support that for any other rounding modes, and it doesn't even support odd rounding modes yet. Considering we don't have a great way to conditionally legalize intrinsics like that, a separate instruction sounds like a better approach for us. (We can always change that in the future if needed).

^^^^^^^^^^^^^

Convert a floating point value to a narrower type using round-to-odd rounding
mode.

G_FPTOSI, G_FPTOUI, G_SITOFP, G_UITOFP
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
15 changes: 15 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1332,6 +1332,21 @@ class LLVM_ABI MachineIRBuilder {
buildFPTrunc(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags = std::nullopt);

/// Build and insert \p Res = G_FPTRUNC_ODD \p Op
///
/// G_FPTRUNC_ODD converts a floating-point value into one with a smaller type
/// using round to odd.
///
/// \pre setBasicBlock or setMI must have been called.
/// \pre \p Res must be a generic virtual register with scalar or vector type.
/// \pre \p Op must be a generic virtual register with scalar or vector type.
/// \pre \p Res must be smaller than \p Op
///
/// \return The newly created instruction.
MachineInstrBuilder
buildFPTruncOdd(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags = std::nullopt);

/// Build and insert \p Res = G_TRUNC \p Op
///
/// G_TRUNC extracts the low bits of a type. For a vector type each element is
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,9 @@ HANDLE_TARGET_OPCODE(G_FPEXT)
/// Generic float to signed-int conversion
HANDLE_TARGET_OPCODE(G_FPTRUNC)

/// Generic float to signed-int conversion using round to odd
HANDLE_TARGET_OPCODE(G_FPTRUNC_ODD)

/// Generic float to signed-int conversion
HANDLE_TARGET_OPCODE(G_FPTOSI)

Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,12 @@ def G_FPTRUNC : GenericInstruction {
let hasSideEffects = false;
}

def G_FPTRUNC_ODD : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
let hasSideEffects = false;
}

def G_FPTOSI : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
Expand Down
28 changes: 25 additions & 3 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5595,6 +5595,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_ANYEXT:
case G_FPEXT:
case G_FPTRUNC:
case G_FPTRUNC_ODD:
case G_SITOFP:
case G_UITOFP:
case G_FPTOSI:
Expand Down Expand Up @@ -8476,7 +8477,8 @@ LegalizerHelper::lowerFPTOINT_SAT(MachineInstr &MI) {
return Legalized;
}

// f64 -> f16 conversion using round-to-nearest-even rounding mode.
// f64 -> f16 conversion using round-to-nearest-even rounding mode for scalars
// and round-to-odd for vectors.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
const LLT S1 = LLT::scalar(1);
Expand All @@ -8486,8 +8488,28 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
MRI.getType(Src).getScalarType() == LLT::scalar(64));

if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;
if (MRI.getType(Src).isVector()) {
LLT SrcTy = MRI.getType(Src);

LLT MidTy = LLT::fixed_vector(SrcTy.getNumElements(), LLT::scalar(32));

// Check if G_FPTRUNC_ODD has been added to the legalizer and the resultant
// types can be legalized.
auto LegalizeAction =
LI.getAction({TargetOpcode::G_FPTRUNC_ODD, {MidTy, SrcTy}}).Action;

if (LegalizeAction == LegalizeActions::Unsupported ||
LegalizeAction == LegalizeActions::NotFound)
return UnableToLegalize;

MIRBuilder.setInstrAndDebugLoc(MI);

MachineInstrBuilder Mid = MIRBuilder.buildFPTruncOdd(MidTy, Src);
MIRBuilder.buildFPTrunc(Dst, Mid.getReg(0));

MI.eraseFromParent();
return Legalized;
}

if (MI.getFlag(MachineInstr::FmAfn)) {
unsigned Flags = MI.getFlags();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,12 @@ MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op,
return buildInstr(TargetOpcode::G_FPTRUNC, Res, Op, Flags);
}

MachineInstrBuilder
MachineIRBuilder::buildFPTruncOdd(const DstOp &Res, const SrcOp &Op,
std::optional<unsigned> Flags) {
return buildInstr(TargetOpcode::G_FPTRUNC_ODD, Res, Op, Flags);
}

MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
const DstOp &Res,
const SrcOp &Op0,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,8 @@ def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;

def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;

def : GINodeEquiv<G_FPTRUNC_ODD, AArch64fcvtxn_n>;

// These are patterns that we only use for GlobalISel via the importer.
def : Pat<(f32 (fadd (vector_extract (v2f32 FPR64:$Rn), (i64 0)),
(vector_extract (v2f32 FPR64:$Rn), (i64 1)))),
Expand Down
19 changes: 17 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DerivedTypes.h"
Expand Down Expand Up @@ -817,10 +818,24 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor(
{{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
.libcallFor({{s16, s128}, {s32, s128}, {s64, s128}})
.clampNumElements(0, v4s16, v4s16)
.clampNumElements(0, v2s32, v2s32)
.moreElementsToNextPow2(1)
.lowerIf([](const LegalityQuery &Q) {
LLT DstTy = Q.Types[0];
LLT SrcTy = Q.Types[1];
return SrcTy.isFixedVector() && DstTy.isFixedVector() &&
SrcTy.getScalarSizeInBits() == 64 &&
DstTy.getScalarSizeInBits() == 16;
})
// Clamp based on input
.clampNumElements(1, v4s32, v4s32)
.clampNumElements(1, v2s64, v2s64)
.scalarize(0);

getActionDefinitionsBuilder(G_FPTRUNC_ODD)
.legalFor({{s16, s32}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
.clampMaxNumElements(1, s32, 4)
.clampMaxNumElements(1, s64, 2);

getActionDefinitionsBuilder(G_FPEXT)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
bool legalizeDynStackAlloc(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizePrefetch(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeBitcast(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeFptrunc(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI) const;
const AArch64Subtarget *ST;
};
} // End llvm namespace.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -558,8 +558,11 @@
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FPTRUNC (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: G_FPTRUNC_ODD (opcode 204): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
# DEBUG-NEXT: G_FPTOSI (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/AArch64/arm64-fp128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1197,30 +1197,22 @@ define <2 x half> @vec_round_f16(<2 x fp128> %val) {
;
; CHECK-GI-LABEL: vec_round_f16:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: sub sp, sp, #64
; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
; CHECK-GI-NEXT: sub sp, sp, #48
; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
; CHECK-GI-NEXT: .cfi_offset w30, -16
; CHECK-GI-NEXT: mov v2.d[0], x8
; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: mov v2.d[1], x8
; CHECK-GI-NEXT: str q2, [sp, #32] // 16-byte Folded Spill
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; CHECK-GI-NEXT: // kill: def $h0 killed $h0 def $q0
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
; CHECK-GI-NEXT: bl __trunctfhf2
; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: add sp, sp, #64
; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-GI-NEXT: mov v1.h[1], v0.h[0]
; CHECK-GI-NEXT: fmov d0, d1
; CHECK-GI-NEXT: add sp, sp, #48
; CHECK-GI-NEXT: ret
%dst = fptrunc <2 x fp128> %val to <2 x half>
ret <2 x half> %dst
Expand Down
47 changes: 6 additions & 41 deletions llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -170,47 +170,12 @@ define <4 x half> @s_to_h(<4 x float> %a) {
}

define <4 x half> @d_to_h(<4 x double> %a) {
; CHECK-CVT-SD-LABEL: d_to_h:
; CHECK-CVT-SD: // %bb.0:
; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: d_to_h:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: d_to_h:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov d2, v0.d[1]
; CHECK-CVT-GI-NEXT: fcvt h0, d0
; CHECK-CVT-GI-NEXT: mov d3, v1.d[1]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: fcvt h2, d2
; CHECK-CVT-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-CVT-GI-NEXT: fcvt h2, d3
; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-CVT-GI-NEXT: mov v0.h[3], v2.h[0]
; CHECK-CVT-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: d_to_h:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov d2, v0.d[1]
; CHECK-FP16-GI-NEXT: fcvt h0, d0
; CHECK-FP16-GI-NEXT: mov d3, v1.d[1]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: fcvt h2, d2
; CHECK-FP16-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-FP16-GI-NEXT: fcvt h2, d3
; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-FP16-GI-NEXT: mov v0.h[3], v2.h[0]
; CHECK-FP16-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-FP16-GI-NEXT: ret
; CHECK-LABEL: d_to_h:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: ret
%1 = fptrunc <4 x double> %a to <4 x half>
ret <4 x half> %1
}
Expand Down
74 changes: 9 additions & 65 deletions llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -176,71 +176,15 @@ define <8 x half> @s_to_h(<8 x float> %a) {
}

define <8 x half> @d_to_h(<8 x double> %a) {
; CHECK-CVT-SD-LABEL: d_to_h:
; CHECK-CVT-SD: // %bb.0:
; CHECK-CVT-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-CVT-SD-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-CVT-SD-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-CVT-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-CVT-SD-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-CVT-SD-NEXT: ret
;
; CHECK-FP16-SD-LABEL: d_to_h:
; CHECK-FP16-SD: // %bb.0:
; CHECK-FP16-SD-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-FP16-SD-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-FP16-SD-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-FP16-SD-NEXT: fcvtn v0.4h, v0.4s
; CHECK-FP16-SD-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-FP16-SD-NEXT: ret
;
; CHECK-CVT-GI-LABEL: d_to_h:
; CHECK-CVT-GI: // %bb.0:
; CHECK-CVT-GI-NEXT: mov d4, v0.d[1]
; CHECK-CVT-GI-NEXT: fcvt h0, d0
; CHECK-CVT-GI-NEXT: mov d5, v1.d[1]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: fcvt h4, d4
; CHECK-CVT-GI-NEXT: mov v0.h[1], v4.h[0]
; CHECK-CVT-GI-NEXT: fcvt h4, d5
; CHECK-CVT-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-CVT-GI-NEXT: mov d1, v2.d[1]
; CHECK-CVT-GI-NEXT: fcvt h2, d2
; CHECK-CVT-GI-NEXT: mov v0.h[3], v4.h[0]
; CHECK-CVT-GI-NEXT: fcvt h1, d1
; CHECK-CVT-GI-NEXT: mov v0.h[4], v2.h[0]
; CHECK-CVT-GI-NEXT: mov d2, v3.d[1]
; CHECK-CVT-GI-NEXT: fcvt h3, d3
; CHECK-CVT-GI-NEXT: mov v0.h[5], v1.h[0]
; CHECK-CVT-GI-NEXT: fcvt h1, d2
; CHECK-CVT-GI-NEXT: mov v0.h[6], v3.h[0]
; CHECK-CVT-GI-NEXT: mov v0.h[7], v1.h[0]
; CHECK-CVT-GI-NEXT: ret
;
; CHECK-FP16-GI-LABEL: d_to_h:
; CHECK-FP16-GI: // %bb.0:
; CHECK-FP16-GI-NEXT: mov d4, v0.d[1]
; CHECK-FP16-GI-NEXT: fcvt h0, d0
; CHECK-FP16-GI-NEXT: mov d5, v1.d[1]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: fcvt h4, d4
; CHECK-FP16-GI-NEXT: mov v0.h[1], v4.h[0]
; CHECK-FP16-GI-NEXT: fcvt h4, d5
; CHECK-FP16-GI-NEXT: mov v0.h[2], v1.h[0]
; CHECK-FP16-GI-NEXT: mov d1, v2.d[1]
; CHECK-FP16-GI-NEXT: fcvt h2, d2
; CHECK-FP16-GI-NEXT: mov v0.h[3], v4.h[0]
; CHECK-FP16-GI-NEXT: fcvt h1, d1
; CHECK-FP16-GI-NEXT: mov v0.h[4], v2.h[0]
; CHECK-FP16-GI-NEXT: mov d2, v3.d[1]
; CHECK-FP16-GI-NEXT: fcvt h3, d3
; CHECK-FP16-GI-NEXT: mov v0.h[5], v1.h[0]
; CHECK-FP16-GI-NEXT: fcvt h1, d2
; CHECK-FP16-GI-NEXT: mov v0.h[6], v3.h[0]
; CHECK-FP16-GI-NEXT: mov v0.h[7], v1.h[0]
; CHECK-FP16-GI-NEXT: ret
; CHECK-LABEL: d_to_h:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvtxn v0.2s, v0.2d
; CHECK-NEXT: fcvtxn v2.2s, v2.2d
; CHECK-NEXT: fcvtxn2 v0.4s, v1.2d
; CHECK-NEXT: fcvtxn2 v2.4s, v3.2d
; CHECK-NEXT: fcvtn v0.4h, v0.4s
; CHECK-NEXT: fcvtn2 v0.8h, v2.4s
; CHECK-NEXT: ret
%1 = fptrunc <8 x double> %a to <8 x half>
ret <8 x half> %1
}
Expand Down
Loading