diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index c8e55fe0f57993..0a44670de76e76 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1535,6 +1535,16 @@ def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; +def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn] +>; + +def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn] +>; + // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id) // // bar_val is the total number of waves that will wait on this diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 2e8cb907df5bc4..55e03ae35ce046 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -279,11 +279,18 @@ def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; -def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] +// mulhi24 yields the high-order 16 bits of the 48-bit result. Here's an example +// that shows mulhi24 is not associative: +// +// Given a = 0x10002, b = c = 0xffffff: +// mulhi24(mulhi24(a, b), c) = mulhi24(0x100, 0xffffff) = 0 +// Which is not equal to: +// mulhi24(a, mulhi24(b, c)) = mulhi24(0x10002, 0xffff) = 1 +def AMDGPUmulhi_u24_impl : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, + [SDNPCommutative] >; -def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, - [SDNPCommutative, SDNPAssociative] +def AMDGPUmulhi_i24_impl : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, + [SDNPCommutative] >; def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, @@ -435,6 +442,14 @@ def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_mul_i24 node:$src0, node:$src1), (AMDGPUmul_i24_impl node:$src0, node:$src1)]>; +def AMDGPUmulhi_u24 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_mulhi_u24 node:$src0, node:$src1), + (AMDGPUmulhi_u24_impl node:$src0, node:$src1)]>; + +def AMDGPUmulhi_i24 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_mulhi_i24 node:$src0, node:$src1), + (AMDGPUmulhi_i24_impl node:$src0, node:$src1)]>; + def AMDGPUbfe_i32 : PatFrags<(ops node:$src0, node:$src1, node:$src2), [(int_amdgcn_sbfe node:$src0, node:$src1, node:$src2), (AMDGPUbfe_i32_impl node:$src0, node:$src1, node:$src2)]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index db2c95d0dfb6b3..430a97e6339e6a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4081,6 +4081,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_mbcnt_hi: case Intrinsic::amdgcn_mul_u24: case Intrinsic::amdgcn_mul_i24: + case Intrinsic::amdgcn_mulhi_u24: + case Intrinsic::amdgcn_mulhi_i24: case Intrinsic::amdgcn_lerp: case Intrinsic::amdgcn_sad_u8: case Intrinsic::amdgcn_msad_u8: diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index ebb0d759dd2254..1d12a59a4bc716 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -492,9 +492,9 @@ defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>; defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; -defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN, AMDGPUmulhi_i24>; +defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; -defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN, AMDGPUmulhi_u24>; +defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN, smin>; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir new file mode 100644 index 00000000000000..f9ec4fa7bba419 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.i24.mir @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: mulhi_i24_vsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: mulhi_i24_vsv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: mulhi_i24_vvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: mulhi_i24_vvs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: mulhi_i24_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: mulhi_i24_vvv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[V_MUL_HI_I32_I24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_I32_I24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_I32_I24_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.i24), %0, %1 + S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir new file mode 100644 index 00000000000000..7d4a269d362e6d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.mulhi.u24.mir @@ -0,0 +1,68 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: mulhi_u24_vsv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: mulhi_u24_vsv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: mulhi_u24_vvs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: mulhi_u24_vvs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1 + S_ENDPGM 0, implicit %2 +... + +--- +name: mulhi_u24_vvv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: mulhi_u24_vvv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[V_MUL_HI_U32_U24_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_U24_e64 [[COPY]], [[COPY1]], implicit $exec + ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_MUL_HI_U32_U24_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.mulhi.u24), %0, %1 + S_ENDPGM 0, implicit %2 +... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll new file mode 100644 index 00000000000000..dc9698d19d7650 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +define i32 @basic(i32 %a, i32 %b) { +; CHECK-LABEL: basic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mul_hi_i32_i24_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = call i32 @llvm.amdgcn.mulhi.i24(i32 %a, i32 %b) + ret i32 %mul +} + +declare i32 @llvm.amdgcn.mulhi.i24(i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll new file mode 100644 index 00000000000000..92120b61dca1b4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +define i32 @basic(i32 %a, i32 %b) { +; CHECK-LABEL: basic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mul_hi_u32_u24_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = call i32 @llvm.amdgcn.mulhi.u24(i32 %a, i32 %b) + ret i32 %mul +} + +declare i32 @llvm.amdgcn.mulhi.u24(i32, i32)