Skip to content

Commit

Permalink
[AMDGPU] Use d16 flag for image.sample instructions
Browse files Browse the repository at this point in the history
Image.sample instruction can be forced to return half type instead of
float when d16 flag is enabled.

This patch adds new pattern in InstCombine to detect if output of
image.sample is used later only by fptrunc which converts the type
from float to half. If pattern is detected then fptrunc and image.sample
are combined to single image.sample which is returning half type.
Later in Lowering part d16 flag is added to image sample intrinsic.

Differential Revision: https://reviews.llvm.org/D124232
  • Loading branch information
mariusz-sikora-at-amd authored and jayfoad committed Apr 25, 2022
1 parent c712bf3 commit d1762fc
Show file tree
Hide file tree
Showing 2 changed files with 455 additions and 17 deletions.
69 changes: 52 additions & 17 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Expand Up @@ -110,33 +110,40 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
llvm_unreachable("Should never be called!");
}

/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
/// the modified arguments.
/// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
/// modified arguments (based on OldIntr) and replaces InstToReplace with
/// this newly created intrinsic call.
static Optional<Instruction *> modifyIntrinsicCall(
IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
InstCombiner &IC,
std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
Func) {
SmallVector<Type *, 4> ArgTys;
if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
return None;

SmallVector<Value *, 8> Args(II.args());
SmallVector<Value *, 8> Args(OldIntr.args());

// Modify arguments and types
Func(Args, ArgTys);

Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);
Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);

CallInst *NewCall = IC.Builder.CreateCall(I, Args);
NewCall->takeName(&II);
NewCall->copyMetadata(II);
NewCall->takeName(&OldIntr);
NewCall->copyMetadata(OldIntr);
if (isa<FPMathOperator>(NewCall))
NewCall->copyFastMathFlags(&II);
NewCall->copyFastMathFlags(&OldIntr);

// Erase and replace uses
if (!II.getType()->isVoidTy())
IC.replaceInstUsesWith(II, NewCall);
return IC.eraseInstFromFunction(II);
if (!InstToReplace.getType()->isVoidTy())
IC.replaceInstUsesWith(InstToReplace, NewCall);

auto RetValue = IC.eraseInstFromFunction(InstToReplace);
if (!OldIntr.isIdenticalTo(&InstToReplace))
IC.eraseInstFromFunction(OldIntr);

return RetValue;
}

static Optional<Instruction *>
Expand All @@ -153,7 +160,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
ImageDimIntr->Dim);
return modifyIntrinsicCall(
II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
Args.erase(Args.begin() + ImageDimIntr->LodIndex);
});
}
Expand All @@ -170,7 +177,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
ImageDimIntr->Dim);
return modifyIntrinsicCall(
II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
Args.erase(Args.begin() + ImageDimIntr->MipIndex);
});
}
Expand All @@ -187,7 +194,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
ImageDimIntr->Dim);
return modifyIntrinsicCall(
II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
});
Expand All @@ -205,13 +212,41 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
AMDGPU::getImageDimIntrinsicByBaseOpcode(
OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
return modifyIntrinsicCall(
II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
});
}
}
}

// Try to use D16
if (ST->hasD16Images()) {

const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);

if (BaseOpcode->HasD16) {

// If the only use of image intrinsic is a fptrunc (with conversion to
// half) then both fptrunc and image intrinsic will be replaced with image
// intrinsic with D16 flag.
if (II.hasOneUse()) {
Instruction *User = II.user_back();

if (User->getOpcode() == Instruction::FPTrunc &&
User->getType()->getScalarType()->isHalfTy()) {

return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
[&](auto &Args, auto &ArgTys) {
// Change return type of image intrinsic.
// Set it to return type of fptrunc.
ArgTys[0] = User->getType();
});
}
}
}
}

// Try to use A16 or G16
if (!ST->hasA16() && !ST->hasG16())
return None;
Expand Down Expand Up @@ -263,7 +298,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
: Type::getInt16Ty(II.getContext());

return modifyIntrinsicCall(
II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
if (!OnlyDerivatives) {
ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
Expand Down

0 comments on commit d1762fc

Please sign in to comment.