diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 631af61e2cfba..df7885e31741e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1132,13 +1132,11 @@ class BinOpSameOpcodeHelper { break; } Value *LHS = I->getOperand(1 - Pos); - // constant + x cannot be -constant - x - // instead, it should be x - -constant - if (Pos == 1 || - ((FromOpcode == Instruction::Add || FromOpcode == Instruction::Or || - FromOpcode == Instruction::Xor) && - ToOpcode == Instruction::Sub)) + // If the target opcode is non-commutative (e.g., shl, sub), + // force the variable to the left and the constant to the right. + if (Pos == 1 || !Instruction::isCommutative(ToOpcode)) return SmallVector({LHS, RHS}); + return SmallVector({RHS, LHS}); } }; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/lhs-constant-non-cummutative.ll b/llvm/test/Transforms/SLPVectorizer/X86/lhs-constant-non-cummutative.ll new file mode 100644 index 0000000000000..5a33d5f5bac5b --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/lhs-constant-non-cummutative.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-50 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @or_lhs_to_shl(ptr %p, ptr %s) { +; CHECK-LABEL: define void @or_lhs_to_shl( +; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i16> [[TMP0]], +; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S]], align 2 +; CHECK-NEXT: ret void +; +entry: + %p1 = getelementptr i16, ptr %p, i64 1 + %p2 = getelementptr i16, ptr %p, i64 2 + %p3 = getelementptr i16, ptr %p, i64 3 + + %l0 = load i16, ptr %p, align 2 + %l1 = load i16, ptr %p1, align 2 + %l2 = load i16, ptr %p2, align 2 + %l3 = load i16, ptr %p3, align 2 + + ; 3 shl instructions force the vectorizer to choose shl as the target + %op0 = shl i16 %l0, 1 + %op1 = shl i16 %l1, 2 + %op2 = shl i16 %l2, 3 + %op3 = or i16 0, %l3 ; The buggy instruction + + %s1 = getelementptr i16, ptr %s, i64 1 + %s2 = getelementptr i16, ptr %s, i64 2 + %s3 = getelementptr i16, ptr %s, i64 3 + + store i16 %op0, ptr %s, align 2 + store i16 %op1, ptr %s1, align 2 + store i16 %op2, ptr %s2, align 2 + store i16 %op3, ptr %s3, align 2 + + ret void +}