From 129ae515fba022353050e0f313b32595de9e4b39 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 28 Jun 2021 12:21:19 -0700 Subject: [PATCH] [INSTCOMBINE] Transform reduction(shuffle V, poison, unique_mask) to reduction(V). After SLP + LTO we may have have reduction(shuffle V, poison, mask). This can be simplified to just reduction(V) if the mask is only for single vector and just all elements from this vector are permuted, without reusing, replacing with undefs and/or other values, etc. Differential Revision: https://reviews.llvm.org/D105053 --- .../InstCombine/InstCombineCalls.cpp | 41 +++++++++++++++++++ .../InstCombine/reduction-shufflevector.ll | 35 ++++++---------- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index fb3dfd89895be..552de8b072e39 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" @@ -1983,6 +1984,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { replaceInstUsesWith(CI, Res); return eraseInstFromFunction(CI); } + LLVM_FALLTHROUGH; + } + case Intrinsic::vector_reduce_add: + case Intrinsic::vector_reduce_mul: + case Intrinsic::vector_reduce_xor: + case Intrinsic::vector_reduce_umax: + case Intrinsic::vector_reduce_umin: + case Intrinsic::vector_reduce_smax: + case Intrinsic::vector_reduce_smin: + case Intrinsic::vector_reduce_fmax: + case Intrinsic::vector_reduce_fmin: + case Intrinsic::vector_reduce_fadd: + case Intrinsic::vector_reduce_fmul: { + bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd && + IID != Intrinsic::vector_reduce_fmul) || + II->hasAllowReassoc(); + const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd || + IID == Intrinsic::vector_reduce_fmul) + ? 1 + : 0; + Value *Arg = II->getArgOperand(ArgIdx); + Value *V; + ArrayRef Mask; + if (!isa(Arg->getType()) || !CanBeReassociated || + !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) || + !cast(Arg)->isSingleSource()) + break; + int Sz = Mask.size(); + SmallBitVector UsedIndices(Sz); + for (int Idx : Mask) { + if (Idx == UndefMaskElem || UsedIndices.test(Idx)) + break; + UsedIndices.set(Idx); + } + // Can remove shuffle iff just shuffled elements, no repeats, undefs, or + // other changes. + if (UsedIndices.all()) { + replaceUse(II->getOperandUse(ArgIdx), V); + return nullptr; + } break; } default: { diff --git a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll index 1ecdb386ac1ab..cf43f1bd626de 100644 --- a/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll +++ b/llvm/test/Transforms/InstCombine/reduction-shufflevector.ll @@ -13,8 +13,7 @@ define i32 @reduce_add(<4 x i32> %x) { define i32 @reduce_or(<4 x i32> %x) { ; CHECK-LABEL: @reduce_or( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> @@ -24,8 +23,7 @@ define i32 @reduce_or(<4 x i32> %x) { define i32 @reduce_and(<4 x i32> %x) { ; CHECK-LABEL: @reduce_and( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -35,8 +33,7 @@ define i32 @reduce_and(<4 x i32> %x) { define i32 @reduce_xor(<4 x i32> %x) { ; CHECK-LABEL: @reduce_xor( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> @@ -46,8 +43,7 @@ define i32 @reduce_xor(<4 x i32> %x) { define i32 @reduce_umax(<4 x i32> %x) { ; CHECK-LABEL: @reduce_umax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -57,8 +53,7 @@ define i32 @reduce_umax(<4 x i32> %x) { define i32 @reduce_umin(<4 x i32> %x) { ; CHECK-LABEL: @reduce_umin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -68,8 +63,7 @@ define i32 @reduce_umin(<4 x i32> %x) { define i32 @reduce_smax(<4 x i32> %x) { ; CHECK-LABEL: @reduce_smax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -79,8 +73,7 @@ define i32 @reduce_smax(<4 x i32> %x) { define i32 @reduce_smin(<4 x i32> %x) { ; CHECK-LABEL: @reduce_smin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]]) ; CHECK-NEXT: ret i32 [[RES]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -90,19 +83,17 @@ define i32 @reduce_smin(<4 x i32> %x) { define float @reduce_fmax(<4 x float> %x) { ; CHECK-LABEL: @reduce_fmax( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> - %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf) + %res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf) ret float %res } define float @reduce_fmin(<4 x float> %x) { ; CHECK-LABEL: @reduce_fmin( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> @@ -112,8 +103,7 @@ define float @reduce_fmin(<4 x float> %x) { define float @reduce_fadd(float %a, <4 x float> %x) { ; CHECK-LABEL: @reduce_fadd( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> @@ -123,8 +113,7 @@ define float @reduce_fadd(float %a, <4 x float> %x) { define float @reduce_fmul(float %a, <4 x float> %x) { ; CHECK-LABEL: @reduce_fmul( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]]) +; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]]) ; CHECK-NEXT: ret float [[RES]] ; %shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32>