Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1430,6 +1430,7 @@ class TargetTransformInfo {

/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
LLVM_ABI static OperandValueInfo getOperandInfo(const Value *V);
LLVM_ABI static OperandValueInfo mergeInfo(const Value *X, const Value *Y);

/// This is an approximation of reciprocal throughput of a math/logic op.
/// A higher cost indicates less expected throughput.
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,27 @@ TargetTransformInfo::getOperandInfo(const Value *V) {
return {OpInfo, OpProps};
}

TargetTransformInfo::OperandValueInfo
TargetTransformInfo::mergeInfo(const Value *X, const Value *Y) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we make this take OperandValueInfo inputs instead of Value? Or at least make an additional variant that the Value args version wraps around.

auto [OpInfoX, OpPropsX] = TargetTransformInfo::getOperandInfo(X);
auto [OpInfoY, OpPropsY] = TargetTransformInfo::getOperandInfo(Y);

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If X == Y we can just return OpInfoX/OpPropsX immediately (or we just handle this in foldShuffleOfBinops)?

OperandValueKind MergeInfo = OK_AnyValue;
OperandValueProperties MergeProp = OP_None;

if (OpInfoX == OK_AnyValue || OpInfoY == OK_AnyValue ||
OpInfoX == OK_UniformValue || OpInfoY == OK_UniformValue)
MergeInfo = OK_AnyValue;
else if (OpInfoX == OK_NonUniformConstantValue ||
OpInfoY == OK_NonUniformConstantValue)
MergeInfo = OK_NonUniformConstantValue;
else
MergeInfo = X == Y ? OK_UniformConstantValue : OK_NonUniformConstantValue;

MergeProp = OpPropsX == OpPropsY ? OpPropsX : OP_None;
return {MergeInfo, MergeProp};
}

InstructionCost TargetTransformInfo::getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
OperandValueInfo Op1Info, OperandValueInfo Op2Info,
Expand Down
13 changes: 9 additions & 4 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2437,6 +2437,10 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
M -= NumSrcElts;
};

TTI::OperandValueInfo Op0Info, Op1Info;
Op0Info = TTI.mergeInfo(X, Z);
Op1Info = TTI.mergeInfo(Y, W);

SmallVector<int> NewMask0(OldMask);
TargetTransformInfo::ShuffleKind SK0 = TargetTransformInfo::SK_PermuteTwoSrc;
if (X == Z) {
Expand Down Expand Up @@ -2500,11 +2504,12 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
nullptr, {Y, W});

if (PredLHS == CmpInst::BAD_ICMP_PREDICATE) {
NewCost +=
TTI.getArithmeticInstrCost(LHS->getOpcode(), ShuffleDstTy, CostKind);
NewCost += TTI.getArithmeticInstrCost(LHS->getOpcode(), ShuffleDstTy,
CostKind, Op0Info, Op1Info);
} else {
NewCost += TTI.getCmpSelInstrCost(LHS->getOpcode(), ShuffleCmpTy,
ShuffleDstTy, PredLHS, CostKind);
NewCost +=
TTI.getCmpSelInstrCost(LHS->getOpcode(), ShuffleCmpTy, ShuffleDstTy,
PredLHS, CostKind, Op0Info, Op1Info);
}

LLVM_DEBUG(dbgs() << "Found a shuffle feeding two binops: " << I
Expand Down
13 changes: 13 additions & 0 deletions llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,19 @@ define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x floa
ret <4 x float> %r
}

define <16 x i16> @shuf_uniform_shift_v16i16_v8i16(<8 x i16> %a0, <8 x i16> %a1) {
; CHECK-LABEL: define <16 x i16> @shuf_uniform_shift_v16i16_v8i16(
; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: [[RES:%.*]] = shl <16 x i16> [[TMP1]], splat (i16 7)
; CHECK-NEXT: ret <16 x i16> [[RES]]
;
%v0 = shl <8 x i16> %a0, splat (i16 7)
%v1 = shl <8 x i16> %a1, splat (i16 7)
%res = shufflevector <8 x i16> %v0, <8 x i16> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i16> %res
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need test coverage for all constant cases , pow2 cases etc.

See X86TTIImpl::getArithmeticInstrCost for examples of how various operand info modes get used - multiplies are probably the easiest to work with.


; Common operand is op0 of the binops.

define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
Expand Down