Skip to content

Commit

Permalink
[INSTCOMBINE] Transform reduction(shuffle V, poison, unique_mask) to …
Browse files Browse the repository at this point in the history
…reduction(V).

After SLP + LTO we may have have reduction(shuffle V, poison,
mask). This can be simplified to just reduction(V) if the mask is only
for single vector and just all elements from this vector are permuted,
  without reusing, replacing with undefs and/or other values, etc.

Differential Revision: https://reviews.llvm.org/D105053
  • Loading branch information
alexey-bataev committed Jun 29, 2021
1 parent 5d933c0 commit 129ae51
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 23 deletions.
41 changes: 41 additions & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Twine.h"
Expand Down Expand Up @@ -1983,6 +1984,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
replaceInstUsesWith(CI, Res);
return eraseInstFromFunction(CI);
}
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
case Intrinsic::vector_reduce_fadd:
case Intrinsic::vector_reduce_fmul: {
bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
IID != Intrinsic::vector_reduce_fmul) ||
II->hasAllowReassoc();
const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
IID == Intrinsic::vector_reduce_fmul)
? 1
: 0;
Value *Arg = II->getArgOperand(ArgIdx);
Value *V;
ArrayRef<int> Mask;
if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
!match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
!cast<ShuffleVectorInst>(Arg)->isSingleSource())
break;
int Sz = Mask.size();
SmallBitVector UsedIndices(Sz);
for (int Idx : Mask) {
if (Idx == UndefMaskElem || UsedIndices.test(Idx))
break;
UsedIndices.set(Idx);
}
// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
// other changes.
if (UsedIndices.all()) {
replaceUse(II->getOperandUse(ArgIdx), V);
return nullptr;
}
break;
}
default: {
Expand Down
35 changes: 12 additions & 23 deletions llvm/test/Transforms/InstCombine/reduction-shufflevector.ll
Expand Up @@ -13,8 +13,7 @@ define i32 @reduce_add(<4 x i32> %x) {

define i32 @reduce_or(<4 x i32> %x) {
; CHECK-LABEL: @reduce_or(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
Expand All @@ -24,8 +23,7 @@ define i32 @reduce_or(<4 x i32> %x) {

define i32 @reduce_and(<4 x i32> %x) {
; CHECK-LABEL: @reduce_and(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
Expand All @@ -35,8 +33,7 @@ define i32 @reduce_and(<4 x i32> %x) {

define i32 @reduce_xor(<4 x i32> %x) {
; CHECK-LABEL: @reduce_xor(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
Expand All @@ -46,8 +43,7 @@ define i32 @reduce_xor(<4 x i32> %x) {

define i32 @reduce_umax(<4 x i32> %x) {
; CHECK-LABEL: @reduce_umax(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
Expand All @@ -57,8 +53,7 @@ define i32 @reduce_umax(<4 x i32> %x) {

define i32 @reduce_umin(<4 x i32> %x) {
; CHECK-LABEL: @reduce_umin(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
Expand All @@ -68,8 +63,7 @@ define i32 @reduce_umin(<4 x i32> %x) {

define i32 @reduce_smax(<4 x i32> %x) {
; CHECK-LABEL: @reduce_smax(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
Expand All @@ -79,8 +73,7 @@ define i32 @reduce_smax(<4 x i32> %x) {

define i32 @reduce_smin(<4 x i32> %x) {
; CHECK-LABEL: @reduce_smin(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]])
; CHECK-NEXT: ret i32 [[RES]]
;
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
Expand All @@ -90,19 +83,17 @@ define i32 @reduce_smin(<4 x i32> %x) {

define float @reduce_fmax(<4 x float> %x) {
; CHECK-LABEL: @reduce_fmax(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]])
; CHECK-NEXT: ret float [[RES]]
;
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
%res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
%res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
ret float %res
}

define float @reduce_fmin(<4 x float> %x) {
; CHECK-LABEL: @reduce_fmin(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]])
; CHECK-NEXT: ret float [[RES]]
;
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
Expand All @@ -112,8 +103,7 @@ define float @reduce_fmin(<4 x float> %x) {

define float @reduce_fadd(float %a, <4 x float> %x) {
; CHECK-LABEL: @reduce_fadd(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
; CHECK-NEXT: ret float [[RES]]
;
%shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
Expand All @@ -123,8 +113,7 @@ define float @reduce_fadd(float %a, <4 x float> %x) {

define float @reduce_fmul(float %a, <4 x float> %x) {
; CHECK-LABEL: @reduce_fmul(
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
; CHECK-NEXT: ret float [[RES]]
;
%shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
Expand Down

0 comments on commit 129ae51

Please sign in to comment.