Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release/18.x: [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis #86731

Merged
merged 1 commit into from
Apr 15, 2024

Conversation

AtariDreams
Copy link
Contributor

Need to support proper type conversion for function arguments to avoid compiler crash.

@llvmbot
Copy link
Collaborator

llvmbot commented Mar 26, 2024

@llvm/pr-subscribers-llvm-transforms

Author: AtariDreams (AtariDreams)

Changes

Need to support proper type conversion for function arguments to avoid compiler crash.


Full diff: https://github.com/llvm/llvm-project/pull/86731.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+20-1)
  • (added) llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll (+82)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 0a9e2c7f49f55f..1fbd69e38eaeec 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -11653,12 +11653,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
       if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
         TysForDecl.push_back(
             FixedVectorType::get(CI->getType(), E->Scalars.size()));
+      auto *CEI = cast<CallInst>(VL0);
       for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
         ValueList OpVL;
         // Some intrinsics have scalar arguments. This argument should not be
         // vectorized.
         if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) {
-          CallInst *CEI = cast<CallInst>(VL0);
           ScalarArg = CEI->getArgOperand(I);
           OpVecs.push_back(CEI->getArgOperand(I));
           if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
@@ -11671,6 +11671,25 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
           LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
           return E->VectorizedValue;
         }
+        auto GetOperandSignedness = [&](unsigned Idx) {
+          const TreeEntry *OpE = getOperandEntry(E, Idx);
+          bool IsSigned = false;
+          auto It = MinBWs.find(OpE);
+          if (It != MinBWs.end())
+            IsSigned = It->second.second;
+          else
+            IsSigned = any_of(OpE->Scalars, [&](Value *R) {
+              return !isKnownNonNegative(R, SimplifyQuery(*DL));
+            });
+          return IsSigned;
+        };
+        ScalarArg = CEI->getArgOperand(I);
+        if (cast<VectorType>(OpVec->getType())->getElementType() !=
+            ScalarArg->getType()) {
+          auto *CastTy = FixedVectorType::get(ScalarArg->getType(),
+                                              VecTy->getNumElements());
+          OpVec = Builder.CreateIntCast(OpVec, CastTy, GetOperandSignedness(I));
+        }
         LLVM_DEBUG(dbgs() << "SLP: OpVec[" << I << "]: " << *OpVec << "\n");
         OpVecs.push_back(OpVec);
         if (UseIntrinsic && isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll
new file mode 100644
index 00000000000000..27c9655f94d3c5
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call-arg-reduced-by-minbitwidth.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-pc-windows-msvc19.34.0 < %s | FileCheck %s
+
+define void @test(ptr %0, i8 %1, i1 %cmp12.i) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[TMP0:%.*]], i8 [[TMP1:%.*]], i1 [[CMP12_I:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i1> poison, i1 [[CMP12_I]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[PRE:%.*]]
+; CHECK:       pre:
+; CHECK-NEXT:    [[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i32>
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i32> @llvm.umax.v8i32(<8 x i32> [[TMP6]], <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i8>
+; CHECK-NEXT:    [[TMP9:%.*]] = add <8 x i8> [[TMP8]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]]
+; CHECK-NEXT:    store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1
+; CHECK-NEXT:    br label [[PRE]]
+;
+entry:
+  %idx11 = getelementptr i8, ptr %0, i64 1
+  %idx22 = getelementptr i8, ptr %0, i64 2
+  %idx33 = getelementptr i8, ptr %0, i64 3
+  %idx44 = getelementptr i8, ptr %0, i64 4
+  %idx55 = getelementptr i8, ptr %0, i64 5
+  %idx66 = getelementptr i8, ptr %0, i64 6
+  %idx77 = getelementptr i8, ptr %0, i64 7
+  br label %pre
+
+pre:
+  %conv.i = zext i8 %1 to i32
+  %2 = tail call i32 @llvm.umax.i32(i32 %conv.i, i32 1)
+  %.sroa.speculated.i = add i32 %2, 1
+  %intensity.0.i = select i1 %cmp12.i, i32 %.sroa.speculated.i, i32 %conv.i
+  %conv14.i = trunc i32 %intensity.0.i to i8
+  store i8 %conv14.i, ptr %0, align 1
+  %conv.i.1 = zext i8 %1 to i32
+  %3 = tail call i32 @llvm.umax.i32(i32 %conv.i.1, i32 1)
+  %ss1 = add i32 %3, 1
+  %ii1 = select i1 %cmp12.i, i32 %ss1, i32 %conv.i.1
+  %conv14.i.1 = trunc i32 %ii1 to i8
+  store i8 %conv14.i.1, ptr %idx11, align 1
+  %conv.i.2 = zext i8 %1 to i32
+  %4 = tail call i32 @llvm.umax.i32(i32 %conv.i.2, i32 1)
+  %ss2 = add i32 %4, 1
+  %ii2 = select i1 %cmp12.i, i32 %ss2, i32 %conv.i.2
+  %conv14.i.2 = trunc i32 %ii2 to i8
+  store i8 %conv14.i.2, ptr %idx22, align 1
+  %conv.i.3 = zext i8 %1 to i32
+  %5 = tail call i32 @llvm.umax.i32(i32 %conv.i.3, i32 1)
+  %ss3 = add i32 %5, 1
+  %ii3 = select i1 %cmp12.i, i32 %ss3, i32 %conv.i.3
+  %conv14.i.3 = trunc i32 %ii3 to i8
+  store i8 %conv14.i.3, ptr %idx33, align 1
+  %conv.i.4 = zext i8 %1 to i32
+  %6 = tail call i32 @llvm.umax.i32(i32 %conv.i.4, i32 1)
+  %ss4 = add i32 %6, 1
+  %ii4 = select i1 %cmp12.i, i32 %ss4, i32 %conv.i.4
+  %conv14.i.4 = trunc i32 %ii4 to i8
+  store i8 %conv14.i.4, ptr %idx44, align 1
+  %conv.i.5 = zext i8 %1 to i32
+  %7 = tail call i32 @llvm.umax.i32(i32 %conv.i.5, i32 1)
+  %ss5 = add i32 %7, 1
+  %ii5 = select i1 %cmp12.i, i32 %ss5, i32 %conv.i.5
+  %conv14.i.5 = trunc i32 %ii5 to i8
+  store i8 %conv14.i.5, ptr %idx55, align 1
+  %conv.i.6 = zext i8 %1 to i32
+  %8 = tail call i32 @llvm.umax.i32(i32 %conv.i.6, i32 1)
+  %ss6 = add i32 %8, 1
+  %ii6 = select i1 %cmp12.i, i32 %ss6, i32 %conv.i.6
+  %conv14.i.6 = trunc i32 %ii6 to i8
+  store i8 %conv14.i.6, ptr %idx66, align 1
+  %conv.i.7 = zext i8 %1 to i32
+  %9 = tail call i32 @llvm.umax.i32(i32 %conv.i.7, i32 1)
+  %ss7 = add i32 %9, 1
+  %ii7 = select i1 %cmp12.i, i32 %ss7, i32 %conv.i.7
+  %conv14.i.7 = trunc i32 %ii7 to i8
+  store i8 %conv14.i.7, ptr %idx77, align 1
+  br label %pre
+}

Copy link
Member

@alexey-bataev alexey-bataev left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

@AtariDreams AtariDreams changed the title release/18.x [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis release/18.x: [SLP]Fix a crash if the argument of call was affected by minbitwidth analysis Apr 2, 2024
@nikic nikic added this to the LLVM 18.X Release milestone Apr 8, 2024
…analysis.

Need to support proper type conversion for function arguments to avoid
compiler crash.
@tstellar tstellar merged commit 6e071cf into llvm:release/18.x Apr 15, 2024
7 of 8 checks passed
@AtariDreams AtariDreams deleted the backport-objc++ branch April 15, 2024 22:44
@tstellar
Copy link
Collaborator

Hi @AtariDreams (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

None yet

5 participants