[SLP][NFC] Test showing a cost estimation issue caused by f82eb7e

The buildvector cost for the case shown in the test should be 0 but it is -1, causing the code to get vectorized, whenit shouldn't. Differential Revision: https://reviews.llvm.org/D148732
llvm · Apr 19, 2023 · a72bcc1 · a72bcc1
1 parent 87cec86
commit a72bcc1
Showing 1 changed file with 44 additions and 0 deletions.
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-float-and-extract-lane1.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-float-and-extract-lane1.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -passes=slp-vectorizer -mtriple=-x86_64-grtev4-linux-gnu -S -mcpu=corei7-avx -pass-remarks-output=%t < %s | FileCheck %s
+; RUN: FileCheck --input-file=%t --check-prefix=YAML %s
+
+; Building a vector of a float (lane 0) and an extract from lane 1 (lane 1)
+; should have a cost of +1 because we need an extra instruction to combine
+; both into a single vector.
+; So this code should not be vectorized.
+
+; YAML: --- !Passed
+; YAML: Pass:            slp-vectorizer
+; YAML: Name:            VectorizedList
+; YAML: Function:        test
+; YAML: Args:
+; YAML:   - String:          'SLP vectorized with cost '
+; YAML:   - Cost:            '-2'
+; YAML:   - String:          ' and with tree size '
+; YAML:   - TreeSize:        '3'
+; YAML: ...
+define void @test(<4 x float> %vec, float %a, float %b, ptr %ptr) {
+; CHECK-LABEL: define void @test
+; CHECK-SAME: (<4 x float> [[VEC:%.*]], float [[A:%.*]], float [[B:%.*]], ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[FADD:%.*]] = fadd float [[A]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> poison, <2 x i32> <i32 undef, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FADD]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> [[TMP3]], <2 x i32> <i32 4, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x float> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1
+; CHECK-NEXT:    [[ROOT:%.*]] = fadd float [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    store float [[ROOT]], ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  %fadd = fadd float %a, %b
+  %extr1 = extractelement <4 x float> %vec, i64 1
+
+  %fsub0 = fsub float %fadd, %fadd
+  %fsub1 = fsub float %extr1, %extr1
+
+  %root = fadd float %fsub0, %fsub1
+  store float %root, ptr %ptr
+  ret void
+}