diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 66a3c257a76f8..ba804c10127e2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7379,6 +7379,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { continue; if (Idx >= static_cast(CommonVF)) Idx = E1Mask[Idx - CommonVF] + VF; + else + Idx = E1Mask[Idx]; } CommonVF = VF; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-resized.ll b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-resized.ll new file mode 100644 index 0000000000000..7d493dac015a9 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-resized.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -slp-threshold=-10 < %s | FileCheck %s + +define i32 @test() { +; CHECK-LABEL: define i32 @test() { +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB3:%.*]] ], [ zeroinitializer, [[BB:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ] +; CHECK-NEXT: ret i32 0 +; +bb: + br label %bb1 + +bb1: + %phi = phi i32 [ %or, %bb3 ], [ 0, %bb ] + %phi2 = phi i32 [ %add, %bb3 ], [ 0, %bb ] + br i1 false, label %bb4, label %bb3 + +bb3: + %or = or i32 0, %phi + %add = add i32 0, 0 + br label %bb1 + +bb4: + %phi5 = phi i32 [ %phi2, %bb1 ] + %phi6 = phi i32 [ %phi2, %bb1 ] + %phi7 = phi i32 [ %phi2, %bb1 ] + %phi8 = phi i32 [ %phi2, %bb1 ] + %phi9 = phi i32 [ %phi2, %bb1 ] + %phi10 = phi i32 [ %phi2, %bb1 ] + %phi11 = phi i32 [ %phi, %bb1 ] + %phi12 = phi i32 [ %phi, %bb1 ] + ret i32 0 +}