diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 6e46547b15b2b..a7748ec1fdc56 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -3900,7 +3900,8 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits(); unsigned MaxVectorSize = TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector); - unsigned MaxElementsInVector = MaxVectorSize / ElementSize; + unsigned MaxElementsInVector = + std::max(1, MaxVectorSize / ElementSize); // When there are multiple shufflevector operations on the same input, // especially when the vector length is larger than the register size, // identical shuffle patterns may occur across different groups of elements. diff --git a/llvm/test/Transforms/VectorCombine/fold-select-shuffle.ll b/llvm/test/Transforms/VectorCombine/fold-select-shuffle.ll new file mode 100644 index 0000000000000..e898689b8f61b --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/fold-select-shuffle.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=vector-combine -S < %s | FileCheck %s + +define ptx_kernel void @shuffle_ptx_i64() { +; CHECK-LABEL: define ptx_kernel void @shuffle_ptx_i64() { +; CHECK-NEXT: [[_LR_PH:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i64> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> [[TMP3]], <8 x i32> +; CHECK-NEXT: ret void +; +.lr.ph: + %0 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> + %1 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> + %2 = or <8 x i64> %0, %1 + %3 = shl <8 x i64> %0, %1 + %4 = shufflevector <8 x i64> %2, <8 x i64> %3, <8 x i32> + ret void +}