diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 5d03b66b0ce33..ebca2d855a467 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -175,7 +175,10 @@ class VPBuilder { VPValue *createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL = {}, const Twine &Name = "") { - return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name); + + return tryInsertInstruction(new VPInstruction( + Instruction::BinaryOps::Or, {LHS, RHS}, + VPRecipeWithIRFlags::DisjointFlagsTy(false), DL, Name)); } VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 3baca43f72767..707a826ecdc25 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -913,6 +913,11 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { WrapFlagsTy(bool HasNUW, bool HasNSW) : HasNUW(HasNUW), HasNSW(HasNSW) {} }; + struct DisjointFlagsTy { + char IsDisjoint : 1; + DisjointFlagsTy(bool IsDisjoint) : IsDisjoint(IsDisjoint) {} + }; + protected: struct GEPFlagsTy { char IsInBounds : 1; @@ -920,9 +925,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { }; private: - struct DisjointFlagsTy { - char IsDisjoint : 1; - }; struct ExactFlagsTy { char IsExact : 1; }; @@ -1016,6 +1018,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe { : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp), FMFs(FMFs) {} + template + VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, + DisjointFlagsTy DisjointFlags, DebugLoc DL = {}) + : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp), + DisjointFlags(DisjointFlags) {} + protected: template VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, @@ -1221,6 +1229,14 @@ class VPInstruction : public VPRecipeWithIRFlags { : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL), Opcode(Opcode), Name(Name.str()) {} + VPInstruction(unsigned Opcode, std::initializer_list Operands, + DisjointFlagsTy DisjointFlag, DebugLoc DL = {}, + const Twine &Name = "") + : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL), + Opcode(Opcode), Name(Name.str()) { + assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint"); + } + VPInstruction(unsigned Opcode, std::initializer_list Operands, FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = ""); diff --git a/llvm/test/Transforms/LoopVectorize/pr87378-vpinstruction-or-drop-poison-generating-flags.ll b/llvm/test/Transforms/LoopVectorize/pr87378-vpinstruction-or-drop-poison-generating-flags.ll new file mode 100644 index 0000000000000..4e38630209b2d --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pr87378-vpinstruction-or-drop-poison-generating-flags.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p loop-vectorize -mattr="+v" -S %s | FileCheck %s + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +; Test case for https://github.com/llvm/llvm-project/issues/87378. +define void @pr87378_vpinstruction_or_drop_poison_generating_flags(ptr %arg, i64 %a, i64 %b, i64 %c) { +; CHECK-LABEL: define void @pr87378_vpinstruction_or_drop_poison_generating_flags( +; CHECK-SAME: ptr [[ARG:%.*]], i64 [[A:%.*]], i64 [[B:%.*]], i64 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1001, [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1001, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1001, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = call @llvm.experimental.stepvector.nxv8i64() +; CHECK-NEXT: [[TMP7:%.*]] = add [[TMP6]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 1, [[TMP10]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i64 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector [[BROADCAST_SPLATINSERT5]], poison, zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP15:%.*]] = select [[TMP13]], [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = xor [[TMP13]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = or [[TMP15]], [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[TMP19:%.*]] = select [[TMP17]], [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = xor [[TMP14]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP21:%.*]] = select [[TMP13]], [[TMP20]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = or [[TMP19]], [[TMP21]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP19]], [[BROADCAST_SPLAT6]], shufflevector ( insertelement ( poison, i64 poison, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP23:%.*]] = extractelement [[PREDPHI]], i32 0 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i16, ptr [[TMP24]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv8i16.p0( zeroinitializer, ptr [[TMP25]], i32 2, [[TMP22]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[C_1:%.*]] = icmp ule i64 [[IV]], [[A]] +; CHECK-NEXT: br i1 [[C_1]], label [[THEN_1:%.*]], label [[ELSE_1:%.*]] +; CHECK: then.1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ule i64 [[IV]], [[B]] +; CHECK-NEXT: br i1 [[C_2]], label [[ELSE_1]], label [[MERGE:%.*]] +; CHECK: else.1: +; CHECK-NEXT: [[C_3:%.*]] = icmp ule i64 [[IV]], [[C]] +; CHECK-NEXT: br i1 [[C_3]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] +; CHECK: then.2: +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ poison, [[THEN_1]] ], [ [[IV]], [[THEN_2]] ] +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i16, ptr [[ARG]], i64 [[IDX]] +; CHECK-NEXT: store i16 0, ptr [[GETELEMENTPTR]], align 2 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %c.1 = icmp ule i64 %iv, %a + br i1 %c.1, label %then.1, label %else.1 + +then.1: + %c.2 = icmp ule i64 %iv, %b + br i1 %c.2, label %else.1, label %merge + +else.1: + %c.3 = icmp ule i64 %iv, %c + br i1 %c.3, label %then.2, label %loop.latch + +then.2: + br label %merge + +merge: + %idx = phi i64 [ poison, %then.1 ], [ %iv, %then.2 ] + %getelementptr = getelementptr i16, ptr %arg, i64 %idx + store i16 0, ptr %getelementptr, align 2 + br label %loop.latch + +loop.latch: + %iv.next = add i64 %iv, 1 + %icmp = icmp eq i64 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + ret void +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;.