Skip to content

Commit

Permalink
[AMDGPU] Don't create mulhi_24 in CGP (#72983)
Browse files Browse the repository at this point in the history
Instead, create a mul24 with a 64 bit result and let ISel take care of
it.

This allows patterns to simply match mul24 even for 64-bit muls instead of having to match both mul/mulhi and a buildvector/bitconvert/etc.
  • Loading branch information
Pierre-vh committed Nov 30, 2023
1 parent c0b9269 commit 8a66510
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 243 deletions.
6 changes: 4 additions & 2 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -1998,12 +1998,14 @@ def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">,
[IntrNoMem, IntrSpeculatable]
>;

def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_i32_ty],
// mul24 intrinsics can return i32 or i64.
// When returning i64, they're lowered to a mul24/mulhi24 pair.
def int_amdgcn_mul_i24 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
>;

def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_i32_ty],
def int_amdgcn_mul_u24 : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
>;
Expand Down
60 changes: 13 additions & 47 deletions llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,34 +624,6 @@ static Value *insertValues(IRBuilder<> &Builder,
return NewVal;
}

// Returns 24-bit or 48-bit (as per `NumBits` and `Size`) mul of `LHS` and
// `RHS`. `NumBits` is the number of KnownBits of the result and `Size` is the
// width of the original destination.
static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS,
unsigned Size, unsigned NumBits, bool IsSigned) {
if (Size <= 32 || NumBits <= 32) {
Intrinsic::ID ID =
IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
return Builder.CreateIntrinsic(ID, {}, {LHS, RHS});
}

assert(NumBits <= 48);

Intrinsic::ID LoID =
IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
Intrinsic::ID HiID =
IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24;

Value *Lo = Builder.CreateIntrinsic(LoID, {}, {LHS, RHS});
Value *Hi = Builder.CreateIntrinsic(HiID, {}, {LHS, RHS});

IntegerType *I64Ty = Builder.getInt64Ty();
Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty);
Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty);

return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
}

bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
if (I.getOpcode() != Instruction::Mul)
return false;
Expand Down Expand Up @@ -691,26 +663,20 @@ bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
extractValues(Builder, RHSVals, RHS);

IntegerType *I32Ty = Builder.getInt32Ty();
for (int I = 0, E = LHSVals.size(); I != E; ++I) {
Value *LHS, *RHS;
if (IsSigned) {
LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
} else {
LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
}
IntegerType *IntrinTy = Size > 32 ? Builder.getInt64Ty() : I32Ty;
Type *DstTy = LHSVals[0]->getType();

Value *Result =
getMul24(Builder, LHS, RHS, Size, LHSBits + RHSBits, IsSigned);

if (IsSigned) {
ResultVals.push_back(
Builder.CreateSExtOrTrunc(Result, LHSVals[I]->getType()));
} else {
ResultVals.push_back(
Builder.CreateZExtOrTrunc(Result, LHSVals[I]->getType()));
}
for (int I = 0, E = LHSVals.size(); I != E; ++I) {
Value *LHS = IsSigned ? Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty)
: Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
Value *RHS = IsSigned ? Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty)
: Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
Intrinsic::ID ID =
IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
Value *Result = Builder.CreateIntrinsic(ID, {IntrinTy}, {LHS, RHS});
Result = IsSigned ? Builder.CreateSExtOrTrunc(Result, DstTy)
: Builder.CreateZExtOrTrunc(Result, DstTy);
ResultVals.push_back(Result);
}

Value *NewVal = insertValues(Builder, Ty, ResultVals);
Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,15 @@ def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>;
def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>;

// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
// when performing the multiply. The result is a 32-bit value.
def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
// when performing the multiply. The result is a 32 or 64 bit value.
def AMDGPUMul24Op : SDTypeProfile<1, 2, [
SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>
]>;

def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", AMDGPUMul24Op,
[SDNPCommutative, SDNPAssociative]
>;
def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", AMDGPUMul24Op,
[SDNPCommutative, SDNPAssociative]
>;

Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/VOP2Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,17 @@ def : divergent_i64_BinOp <and, V_AND_B32_e64>;
def : divergent_i64_BinOp <or, V_OR_B32_e64>;
def : divergent_i64_BinOp <xor, V_XOR_B32_e64>;

// mul24 w/ 64 bit output.
class mul24_64_Pat<SDPatternOperator Op, Instruction InstLo, Instruction InstHi> : GCNPat<
(i64 (Op i32:$src0, i32:$src1)),
(REG_SEQUENCE VReg_64,
(InstLo $src0, $src1), sub0,
(InstHi $src0, $src1), sub1)
>;

def : mul24_64_Pat<AMDGPUmul_i24, V_MUL_I32_I24_e64, V_MUL_HI_I32_I24_e64>;
def : mul24_64_Pat<AMDGPUmul_u24, V_MUL_U32_U24_e64, V_MUL_HI_U32_U24_e64>;

//===----------------------------------------------------------------------===//
// 16-Bit Operand Instructions
//===----------------------------------------------------------------------===//
Expand Down

0 comments on commit 8a66510

Please sign in to comment.