diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9adaaf3fe66d6..572c4399b8b55 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1183,6 +1183,7 @@ class BoUpSLP { void deleteTree() { VectorizableTree.clear(); ScalarToTreeEntry.clear(); + MultiNodeScalars.clear(); MustGather.clear(); EntryToLastInstruction.clear(); ExternalUses.clear(); @@ -2431,7 +2432,7 @@ class BoUpSLP { return false; }); if (It != VL.end()) { - assert(TE->isSame(VL) && "Expedted same scalars."); + assert(TE->isSame(VL) && "Expected same scalars."); return TE; } return nullptr; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalar-in-multi-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalar-in-multi-node.ll new file mode 100644 index 0000000000000..13015a495a918 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalar-in-multi-node.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +%struct.UFP = type { i32, i32, i32, [4 x i32] } + +define void @test(ptr %u) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[U:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr null, align 4 +; CHECK-NEXT: [[IDX:%.*]] = getelementptr [[STRUCT_UFP:%.*]], ptr [[U]], i64 0, i32 3, i64 3 +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr [[STRUCT_UFP]], ptr [[U]], i64 0, i32 3, i64 2 +; CHECK-NEXT: [[IDX2:%.*]] = load i32, ptr [[IDX]], align 4 +; CHECK-NEXT: [[IDX3:%.*]] = load i32, ptr [[IDX1]], align 4 +; CHECK-NEXT: br label [[WHILE:%.*]] +; CHECK: bb: +; CHECK-NEXT: store i32 [[OR_I_I:%.*]], ptr [[IDX]], align 4 +; CHECK-NEXT: store i32 [[OR19_I_I:%.*]], ptr [[IDX1]], align 4 +; CHECK-NEXT: ret void +; CHECK: while: +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[OR26_I_I:%.*]], [[WHILE]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[IDX3]], [[ENTRY]] ], [ [[OR19_I_I]], [[WHILE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[IDX2]], [[ENTRY]] ], [ 0, [[WHILE]] ] +; CHECK-NEXT: [[OR_I_I]] = tail call i32 @llvm.fshl.i32(i32 [[TMP2]], i32 0, i32 0) +; CHECK-NEXT: [[OR19_I_I]] = tail call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP2]], i32 0) +; CHECK-NEXT: [[OR26_I_I]] = tail call i32 @llvm.fshl.i32(i32 0, i32 [[TMP1]], i32 0) +; CHECK-NEXT: br i1 false, label [[BB:%.*]], label [[WHILE]] +; +entry: + %0 = load i32, ptr null, align 4 + %idx = getelementptr %struct.UFP, ptr %u, i64 0, i32 3, i64 3 + %idx1 = getelementptr %struct.UFP, ptr %u, i64 0, i32 3, i64 2 + %idx2 = load i32, ptr %idx, align 4 + %idx3 = load i32, ptr %idx1, align 4 + br label %while + +bb: + store i32 %or.i.i, ptr %idx, align 4 + store i32 %or19.i.i, ptr %idx1, align 4 + ret void + +while: + %1 = phi i32 [ %0, %entry ], [ %or26.i.i, %while ] + %2 = phi i32 [ %idx3, %entry ], [ %or19.i.i, %while ] + %3 = phi i32 [ %idx2, %entry ], [ 0, %while ] + %or.i.i = tail call i32 @llvm.fshl.i32(i32 %2, i32 0, i32 0) + %or19.i.i = tail call i32 @llvm.fshl.i32(i32 %1, i32 %2, i32 0) + %or26.i.i = tail call i32 @llvm.fshl.i32(i32 0, i32 %1, i32 0) + br i1 false, label %bb, label %while +} + +declare i32 @llvm.fshl.i32(i32, i32, i32)