Skip to content

Commit

Permalink
DAG: Fix widening of fptrunc_round vectors (#89918)
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm authored Apr 24, 2024
1 parent 21ef187 commit 50082d6
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 12 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -984,7 +984,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_XRINT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
SDValue WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N);
SDValue WidenVecRes_ExpOp(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4242,7 +4242,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;

case ISD::IS_FPCLASS:
Res = WidenVecRes_IS_FPCLASS(N);
case ISD::FPTRUNC_ROUND:
Res = WidenVecRes_UnarySameEltsWithScalarArg(N);
break;

case ISD::FLDEXP:
Expand Down Expand Up @@ -5004,7 +5005,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
}

SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
/// Result and first source operand are different scalar types, but must have
/// the same number of elements. There is an additional control argument which
/// should be passed through unchanged.
SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) {
SDValue FpValue = N->getOperand(0);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)
Expand Down
53 changes: 44 additions & 9 deletions llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
Original file line number Diff line number Diff line change
Expand Up @@ -266,16 +266,51 @@ define amdgpu_gs void @s_fptrunc_round_v2f32_to_v2f16_upward_multiple_calls(<2 x
ret void
}

; FIXME
; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
; ret <3 x half> %res
; }
define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_upward(<3 x float> %a) {
; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_upward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 1), 1
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
; GISEL-NEXT: ; return to shader part epilog
%res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.upward")
ret <3 x half> %res
}

; define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
; %res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
; ret <3 x half> %res
; }
define amdgpu_gs <3 x half> @v_fptrunc_round_v3f32_to_v3f16_downward(<3 x float> %a) {
; SDAG-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
; SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
; SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
; SDAG-NEXT: ; return to shader part epilog
;
; GISEL-LABEL: v_fptrunc_round_v3f32_to_v3f16_downward:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 3, 1), 1
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, v2
; GISEL-NEXT: ; return to shader part epilog
%res = call <3 x half> @llvm.fptrunc.round.v3f16.v3f32(<3 x float> %a, metadata !"round.downward")
ret <3 x half> %res
}

define amdgpu_gs <4 x half> @v_fptrunc_round_v4f32_to_v4f16_upward(<4 x float> %a) {
; SDAG-LABEL: v_fptrunc_round_v4f32_to_v4f16_upward:
Expand Down

0 comments on commit 50082d6

Please sign in to comment.