diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 49abb1e008900a..1632362109fd23 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2897,8 +2897,22 @@ static SDValue simplifyMul24(SDNode *Node24, unsigned NewOpcode = Node24->getOpcode(); if (IsIntrin) { unsigned IID = cast(Node24->getOperand(0))->getZExtValue(); - NewOpcode = IID == Intrinsic::amdgcn_mul_i24 ? - AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; + switch (IID) { + case Intrinsic::amdgcn_mul_i24: + NewOpcode = AMDGPUISD::MUL_I24; + break; + case Intrinsic::amdgcn_mul_u24: + NewOpcode = AMDGPUISD::MUL_U24; + break; + case Intrinsic::amdgcn_mulhi_i24: + NewOpcode = AMDGPUISD::MULHI_I24; + break; + case Intrinsic::amdgcn_mulhi_u24: + NewOpcode = AMDGPUISD::MULHI_U24; + break; + default: + llvm_unreachable("Expected 24-bit mul intrinsic"); + } } APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24); @@ -3107,6 +3121,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( switch (IID) { case Intrinsic::amdgcn_mul_i24: case Intrinsic::amdgcn_mul_u24: + case Intrinsic::amdgcn_mulhi_i24: + case Intrinsic::amdgcn_mulhi_u24: return simplifyMul24(N, DCI); case Intrinsic::amdgcn_fract: case Intrinsic::amdgcn_rsq: diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll index 7c15e731a55539..eaa45b929b2b1d 100644 --- a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll +++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll @@ -575,11 +575,9 @@ define i64 @test_umul48_i64(i64 %lhs, i64 %rhs) { ; GCN-LABEL: test_umul48_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s4, 0xffffff -; GCN-NEXT: v_and_b32_e32 v1, s4, v0 -; GCN-NEXT: v_and_b32_e32 v3, s4, v2 -; GCN-NEXT: v_mul_u32_u24_e32 v0, v0, v2 -; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v3 +; GCN-NEXT: v_mul_u32_u24_e32 v3, v0, v2 +; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v2 +; GCN-NEXT: v_mov_b32_e32 v0, v3 ; GCN-NEXT: s_setpc_b64 s[30:31] %lhs24 = and i64 %lhs, 16777215 %rhs24 = and i64 %rhs, 16777215 @@ -588,49 +586,16 @@ define i64 @test_umul48_i64(i64 %lhs, i64 %rhs) { } define <2 x i64> @test_umul48_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { -; SI-LABEL: test_umul48_v2i64: -; SI: ; %bb.0: -; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SI-NEXT: s_mov_b32 s4, 0xffffff -; SI-NEXT: v_mul_u32_u24_e32 v5, v0, v4 -; SI-NEXT: v_mul_u32_u24_e32 v7, v2, v6 -; SI-NEXT: v_and_b32_e32 v2, s4, v2 -; SI-NEXT: v_and_b32_e32 v0, s4, v0 -; SI-NEXT: v_and_b32_e32 v3, s4, v6 -; SI-NEXT: v_and_b32_e32 v1, s4, v4 -; SI-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1 -; SI-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v3 -; SI-NEXT: v_mov_b32_e32 v0, v5 -; SI-NEXT: v_mov_b32_e32 v2, v7 -; SI-NEXT: s_setpc_b64 s[30:31] -; -; VI-LABEL: test_umul48_v2i64: -; VI: ; %bb.0: -; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: s_mov_b32 s4, 0xffffff -; VI-NEXT: v_and_b32_e32 v3, s4, v2 -; VI-NEXT: v_and_b32_e32 v1, s4, v0 -; VI-NEXT: v_and_b32_e32 v5, s4, v6 -; VI-NEXT: v_and_b32_e32 v7, s4, v4 -; VI-NEXT: v_mul_u32_u24_e32 v0, v0, v4 -; VI-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v7 -; VI-NEXT: v_mul_u32_u24_e32 v2, v2, v6 -; VI-NEXT: v_mul_hi_u32_u24_e32 v3, v3, v5 -; VI-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: test_umul48_v2i64: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v3, s4, v2 -; GFX9-NEXT: v_and_b32_e32 v1, s4, v0 -; GFX9-NEXT: v_and_b32_e32 v5, s4, v6 -; GFX9-NEXT: v_and_b32_e32 v7, s4, v4 -; GFX9-NEXT: v_mul_u32_u24_e32 v0, v0, v4 -; GFX9-NEXT: v_mul_hi_u32_u24_e32 v1, v1, v7 -; GFX9-NEXT: v_mul_u32_u24_e32 v2, v2, v6 -; GFX9-NEXT: v_mul_hi_u32_u24_e32 v3, v3, v5 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: test_umul48_v2i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_mul_u32_u24_e32 v5, v0, v4 +; GCN-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v4 +; GCN-NEXT: v_mul_u32_u24_e32 v4, v2, v6 +; GCN-NEXT: v_mul_hi_u32_u24_e32 v3, v2, v6 +; GCN-NEXT: v_mov_b32_e32 v0, v5 +; GCN-NEXT: v_mov_b32_e32 v2, v4 +; GCN-NEXT: s_setpc_b64 s[30:31] %lhs24 = and <2 x i64> %lhs, %rhs24 = and <2 x i64> %rhs, %mul = mul <2 x i64> %lhs24, %rhs24