diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7cef98f465715..a0257e760f99a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1432,11 +1432,11 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { if (auto *Store = dyn_cast(U)) { // VPWidenStore doesn't have users, and stores are always - // profitable to widen: hence, permitting single-scalar stored - // values is an important leaf condition. The assert must hold as - // we checked the RepOrWidenR operand against - // vputils::isSingleScalar. - assert(RepOrWidenR == Store->getAddr() || + // profitable to widen: hence, permitting address and mask + // operands, and single-scalar stored values is an important leaf + // condition. The assert must hold as we checked the RepOrWidenR + // operand against vputils::isSingleScalar. + assert(RepOrWidenR != Store->getStoredValue() || vputils::isSingleScalar(Store->getStoredValue())); return true; } diff --git a/llvm/test/Transforms/LoopVectorize/X86/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/X86/narrow-to-single-scalar.ll new file mode 100644 index 0000000000000..94a05a67a0bdc --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/narrow-to-single-scalar.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -p loop-vectorize -mcpu=skylake -S %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +@p = external global [3952 x i8], align 8 +@q = external global [3952 x i8], align 8 + +define void @narrow_store_user_mask_operand(i32 %x) { +; CHECK-LABEL: define void @narrow_store_user_mask_operand( +; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP_PH:.*]] +; CHECK: [[LOOP_PH]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_TAIL:.*]] ] +; CHECK-NEXT: [[X_POS:%.*]] = icmp sgt i32 [[X]], 0 +; CHECK-NEXT: br i1 [[X_POS]], label %[[LOOP_BODY:.*]], label %[[LOOP_TAIL]] +; CHECK: [[LOOP_BODY]]: +; CHECK-NEXT: [[LD_P:%.*]] = load double, ptr @p, align 8 +; CHECK-NEXT: [[GEP_Q_IV:%.*]] = getelementptr double, ptr @q, i64 [[IV]] +; CHECK-NEXT: [[GEP_Q_IV_8:%.*]] = getelementptr i8, ptr [[GEP_Q_IV]], i64 -8 +; CHECK-NEXT: store double [[LD_P]], ptr [[GEP_Q_IV_8]], align 8 +; CHECK-NEXT: br label %[[LOOP_TAIL]] +; CHECK: [[LOOP_TAIL]]: +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_PH]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.ph + +loop.ph: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.tail ] + %x.pos = icmp sgt i32 %x, 0 + br i1 %x.pos, label %loop.body, label %loop.tail + +loop.body: + %ld.p = load double, ptr @p + %gep.q.iv = getelementptr double, ptr @q, i64 %iv + %gep.q.iv.8 = getelementptr i8, ptr %gep.q.iv, i64 -8 + store double %ld.p, ptr %gep.q.iv.8 + br label %loop.tail + +loop.tail: + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop.ph + +exit: + ret void +}