-
Notifications
You must be signed in to change notification settings - Fork 14.5k
DAG: Fix assert when legalizing v3f16 ldexp #97098
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
11d7abe
to
ac3dbbf
Compare
@llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) ChangesFor the v3f16.v3i32 case, the v3f16 would request widening Fixes: SWDEV-470951 Patch is 26.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97098.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 532c6306fb3d1..4d748a46158d1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5231,7 +5231,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS;
+ EVT ExpVT = RHS.getValueType();
+ SDValue ExpOp = RHS;
+ if (ExpVT.isVector()) {
+ if (getTypeAction(ExpVT) == TargetLowering::TypeWidenVector)
+ ExpOp = GetWidenedVector(RHS);
+ else {
+ EVT WideExpVT =
+ WidenVT.changeVectorElementType(ExpVT.getVectorElementType());
+ ExpOp = ModifyToType(RHS, WideExpVT);
+ }
+ }
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index dc7fe840f5118..b2b5153bb6c2a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -565,6 +565,507 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
ret <2 x half> %result
}
+define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
+; GFX6-SDAG-LABEL: test_ldexp_v3f16_v3i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v4
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v5
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX8-SDAG-NEXT: v_med3_i32 v2, v2, s4, v5
+; GFX8-SDAG-NEXT: v_med3_i32 v3, v3, s4, v5
+; GFX8-SDAG-NEXT: v_med3_i32 v4, v4, s4, v5
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: test_ldexp_v3f16_v3i32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX9-SDAG-NEXT: v_med3_i32 v3, v3, s4, v5
+; GFX9-SDAG-NEXT: v_med3_i32 v2, v2, s4, v5
+; GFX9-SDAG-NEXT: v_med3_i32 v4, v4, s4, v5
+; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v5, v3
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v4
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v5
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0xffff8000
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX8-GISEL-NEXT: v_med3_i32 v2, v2, v5, v6
+; GFX8-GISEL-NEXT: v_med3_i32 v3, v3, v5, v6
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_med3_i32 v3, v4, v5, v6
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: test_ldexp_v3f16_v3i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v5, v6
+; GFX9-GISEL-NEXT: v_med3_i32 v3, v3, v5, v6
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_med3_i32 v3, v4, v5, v6
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
+; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v6, v3
+; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v4, v5
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b)
+ ret <3 x half> %result
+}
+
+define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
+; GFX6-SDAG-LABEL: test_ldexp_v3f16_v3i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
+; GFX6-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v4
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v5
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: test_ldexp_v3f16_v3i16:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v5, v4
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-GISEL-NEXT: v_bfe_i32 v3, v3, 0, 16
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GFX6-GISEL-NEXT: v_bfe_i32 v3, v4, 0, 16
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
+; GFX6-GISEL-NEXT: v_bfe_i32 v3, v5, 0, 16
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: test_ldexp_v3f16_v3i16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v0, v2
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v4
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v5
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> %a, <3 x i16> %b)
+ ret <3 x half> %result
+}
+
+define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
+; GFX6-SDAG-LABEL: test_ldexp_v4f16_v4i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v4
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v5
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v6
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v3, v3, v7
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: test_ldexp_v4f16_v4i32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX8-SDAG-NEXT: v_med3_i32 v4, v4, s4, v6
+; GFX8-SDAG-NEXT: v_med3_i32 v5, v5, s4, v6
+; GFX8-SDAG-NEXT: v_med3_i32 v2, v2, s4, v6
+; GFX8-SDAG-NEXT: v_med3_i32 v3, v3, s4, v6
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v4, v1, v4
+; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: test_ldexp_v4f16_v4i32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX9-SDAG-NEXT: v_med3_i32 v5, v5, s4, v6
+; GFX9-SDAG-NEXT: v_med3_i32 v4, v4, s4, v6
+; GFX9-SDAG-NEXT: v_med3_i32 v3, v3, s4, v6
+; GFX9-SDAG-NEXT: v_med3_i32 v2, v2, s4, v6
+; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v5
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-SDAG-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v5, v6, v5
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v7, v3
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v5
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: test_ldexp_v4f16_v4i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v4
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v5
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v6
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v3, v3, v7
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: test_ldexp_v4f16_v4i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0xffff8000
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v7, 0x7fff
+; GFX8-GISEL-NEXT: v_med3_i32 v2, v2, v6, v7
+; GFX8-GISEL-NEXT: v_med3_i32 v3, v3, v6, v7
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_med3_i32 v3, v4, v6, v7
+; GFX8-GISEL-NEXT: v_med3_i32 v4, v5, v6, v7
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v3, v1, v3
+; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: test_ldexp_v4f16_v4i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v6, v7
+; GFX9-GISEL-NEXT: v_med3_i32 v3, v3, v6, v7
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_med3_i32 v3, v4, v6, v7
+; GFX9-GISEL-NEXT: v_med3_i32 v4, v5, v6, v7
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v3, v1, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v6
+; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v4, v6
+; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v3, v6
+; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v6
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v7, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v8, v5
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b)
+ ret <4 x half> %result
+}
+
+define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
+; GFX6-SDAG-LABEL: test_ldexp_v4f16_v4i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; ...
[truncated]
|
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesFor the v3f16.v3i32 case, the v3f16 would request widening Fixes: SWDEV-470951 Patch is 26.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/97098.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 532c6306fb3d1..4d748a46158d1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5231,7 +5231,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
SDValue RHS = N->getOperand(1);
- SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS;
+ EVT ExpVT = RHS.getValueType();
+ SDValue ExpOp = RHS;
+ if (ExpVT.isVector()) {
+ if (getTypeAction(ExpVT) == TargetLowering::TypeWidenVector)
+ ExpOp = GetWidenedVector(RHS);
+ else {
+ EVT WideExpVT =
+ WidenVT.changeVectorElementType(ExpVT.getVectorElementType());
+ ExpOp = ModifyToType(RHS, WideExpVT);
+ }
+ }
return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index dc7fe840f5118..b2b5153bb6c2a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -565,6 +565,507 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
ret <2 x half> %result
}
+define <3 x half> @test_ldexp_v3f16_v3i32(<3 x half> %a, <3 x i32> %b) {
+; GFX6-SDAG-LABEL: test_ldexp_v3f16_v3i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v4
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v5
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX8-SDAG-NEXT: v_med3_i32 v2, v2, s4, v5
+; GFX8-SDAG-NEXT: v_med3_i32 v3, v3, s4, v5
+; GFX8-SDAG-NEXT: v_med3_i32 v4, v4, s4, v5
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: test_ldexp_v3f16_v3i32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX9-SDAG-NEXT: v_med3_i32 v3, v3, s4, v5
+; GFX9-SDAG-NEXT: v_med3_i32 v2, v2, s4, v5
+; GFX9-SDAG-NEXT: v_med3_i32 v4, v4, s4, v5
+; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v5, v3
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-NEXT: v_med3_i32 v2, v4, s0, 0x7fff
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v4
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v5
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 0xffff8000
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX8-GISEL-NEXT: v_med3_i32 v2, v2, v5, v6
+; GFX8-GISEL-NEXT: v_med3_i32 v3, v3, v5, v6
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_med3_i32 v3, v4, v5, v6
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: test_ldexp_v3f16_v3i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v5, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v5, v6
+; GFX9-GISEL-NEXT: v_med3_i32 v3, v3, v5, v6
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_med3_i32 v3, v4, v5, v6
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v5, 0x7fff
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v5
+; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v3, v5
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v6, v3
+; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v4, v5
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <3 x half> @llvm.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b)
+ ret <3 x half> %result
+}
+
+define <3 x half> @test_ldexp_v3f16_v3i16(<3 x half> %a, <3 x i16> %b) {
+; GFX6-SDAG-LABEL: test_ldexp_v3f16_v3i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-SDAG-NEXT: v_bfe_i32 v5, v5, 0, 16
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-SDAG-NEXT: v_bfe_i32 v4, v4, 0, 16
+; GFX6-SDAG-NEXT: v_bfe_i32 v3, v3, 0, 16
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v4
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v5
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i16:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: test_ldexp_v3f16_v3i16:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v5, v4
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: test_ldexp_v3f16_v3i16:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-GISEL-NEXT: v_bfe_i32 v3, v3, 0, 16
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v3
+; GFX6-GISEL-NEXT: v_bfe_i32 v3, v4, 0, 16
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v3
+; GFX6-GISEL-NEXT: v_bfe_i32 v3, v5, 0, 16
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v3
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i16:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v4, v0, v2
+; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: test_ldexp_v3f16_v3i16:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v0, v2
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v4
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v5
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <3 x half> @llvm.ldexp.v3f16.v3i16(<3 x half> %a, <3 x i16> %b)
+ ret <3 x half> %result
+}
+
+define <4 x half> @test_ldexp_v4f16_v4i32(<4 x half> %a, <4 x i32> %b) {
+; GFX6-SDAG-LABEL: test_ldexp_v4f16_v4i32:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v0, v0, v4
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v1, v1, v5
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v2, v2, v6
+; GFX6-SDAG-NEXT: v_ldexp_f32_e32 v3, v3, v7
+; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-SDAG-LABEL: test_ldexp_v4f16_v4i32:
+; GFX8-SDAG: ; %bb.0:
+; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX8-SDAG-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX8-SDAG-NEXT: v_med3_i32 v4, v4, s4, v6
+; GFX8-SDAG-NEXT: v_med3_i32 v5, v5, s4, v6
+; GFX8-SDAG-NEXT: v_med3_i32 v2, v2, s4, v6
+; GFX8-SDAG-NEXT: v_med3_i32 v3, v3, s4, v6
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v4, v1, v4
+; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v4, v1
+; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-SDAG-LABEL: test_ldexp_v4f16_v4i32:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX9-SDAG-NEXT: v_med3_i32 v5, v5, s4, v6
+; GFX9-SDAG-NEXT: v_med3_i32 v4, v4, s4, v6
+; GFX9-SDAG-NEXT: v_med3_i32 v3, v3, s4, v6
+; GFX9-SDAG-NEXT: v_med3_i32 v2, v2, s4, v6
+; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, v5
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1
+; GFX11-SDAG-NEXT: v_med3_i32 v5, v5, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_med3_i32 v3, v3, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-SDAG-NEXT: v_med3_i32 v2, v2, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_med3_i32 v4, v4, s0, 0x7fff
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v5, v6, v5
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v7, v3
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-SDAG-NEXT: v_pack_b32_f16 v1, v1, v5
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX6-GISEL-LABEL: test_ldexp_v4f16_v4i32:
+; GFX6-GISEL: ; %bb.0:
+; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v3, v3
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v0, v0, v4
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v1, v1, v5
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v2, v2, v6
+; GFX6-GISEL-NEXT: v_ldexp_f32_e32 v3, v3, v7
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: test_ldexp_v4f16_v4i32:
+; GFX8-GISEL: ; %bb.0:
+; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 0xffff8000
+; GFX8-GISEL-NEXT: v_mov_b32_e32 v7, 0x7fff
+; GFX8-GISEL-NEXT: v_med3_i32 v2, v2, v6, v7
+; GFX8-GISEL-NEXT: v_med3_i32 v3, v3, v6, v7
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_med3_i32 v3, v4, v6, v7
+; GFX8-GISEL-NEXT: v_med3_i32 v4, v5, v6, v7
+; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v3, v1, v3
+; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: test_ldexp_v4f16_v4i32:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v6, 0xffff8000
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v7, 0x7fff
+; GFX9-GISEL-NEXT: v_med3_i32 v2, v2, v6, v7
+; GFX9-GISEL-NEXT: v_med3_i32 v3, v3, v6, v7
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v2, v0, v2
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_med3_i32 v3, v4, v6, v7
+; GFX9-GISEL-NEXT: v_med3_i32 v4, v5, v6, v7
+; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v3, v1, v3
+; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
+; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v3
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v6, 0x7fff
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v0
+; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_med3_i32 v2, 0xffff8000, v2, v6
+; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v4, v6
+; GFX11-GISEL-NEXT: v_med3_i32 v3, 0xffff8000, v3, v6
+; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v6
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v0, v2
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v4
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v7, v3
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v8, v5
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %result = call <4 x half> @llvm.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b)
+ ret <4 x half> %result
+}
+
+define <4 x half> @test_ldexp_v4f16_v4i16(<4 x half> %a, <4 x i16> %b) {
+; GFX6-SDAG-LABEL: test_ldexp_v4f16_v4i16:
+; GFX6-SDAG: ; %bb.0:
+; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v3, v3
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v2, v2
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1
+; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v3
+; ...
[truncated]
|
For the v3f16.v3i32 case, the v3f16 would request widening to v4f16, but the v3i32 does not require widening to be a legal type, so GetWidenedVector would fail. We need to widen the exponent vector to the same element count as the result. Fixes: SWDEV-470951
ac3dbbf
to
52bfee3
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
For the v3f16.v3i32 case, the v3f16 would request widening to v4f16, but the v3i32 does not require widening to be a legal type, so GetWidenedVector would fail. We need to widen the exponent vector to the same element count as the result. Fixes: SWDEV-470951
For the v3f16.v3i32 case, the v3f16 would request widening to v4f16, but the v3i32 does not require widening to be a legal type, so GetWidenedVector would fail. We need to widen the exponent vector to the same element count as the result. Fixes: SWDEV-470951 (cherry picked from commit 76bc071) Change-Id: I7ac758a0e4ad70abca28b9413181f94cef549b00
For the v3f16.v3i32 case, the v3f16 would request widening
to v4f16, but the v3i32 does not require widening to be a legal
type, so GetWidenedVector would fail. We need to widen the exponent
vector to the same element count as the result.
Fixes: SWDEV-470951