diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index b93bdf244237e..53291a931530f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -721,6 +721,10 @@ class VPIRFlags { AllFlags = Other.AllFlags; } + /// Only keep flags also present in \p Other. \p Other must have the same + /// OpType as the current object. + void intersectFlags(const VPIRFlags &Other); + /// Drop all poison-generating flags. void dropPoisonGeneratingFlags() { // NOTE: This needs to be kept in-sync with diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 46162a9276469..bf51489543098 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -392,6 +392,42 @@ void VPPartialReductionRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +void VPIRFlags::intersectFlags(const VPIRFlags &Other) { + assert(OpType == Other.OpType && "OpType must match"); + switch (OpType) { + case OperationType::OverflowingBinOp: + WrapFlags.HasNUW &= Other.WrapFlags.HasNUW; + WrapFlags.HasNSW &= Other.WrapFlags.HasNSW; + break; + case OperationType::Trunc: + TruncFlags.HasNUW &= Other.TruncFlags.HasNUW; + TruncFlags.HasNSW &= Other.TruncFlags.HasNSW; + break; + case OperationType::DisjointOp: + DisjointFlags.IsDisjoint &= Other.DisjointFlags.IsDisjoint; + break; + case OperationType::PossiblyExactOp: + ExactFlags.IsExact &= Other.ExactFlags.IsExact; + break; + case OperationType::GEPOp: + GEPFlags &= Other.GEPFlags; + break; + case OperationType::FPMathOp: + FMFs.NoNaNs &= Other.FMFs.NoNaNs; + FMFs.NoInfs &= Other.FMFs.NoInfs; + break; + case OperationType::NonNegOp: + NonNegFlags.NonNeg &= Other.NonNegFlags.NonNeg; + break; + case OperationType::Cmp: + assert(CmpPredicate == Other.CmpPredicate && "Cannot drop CmpPredicate"); + break; + case OperationType::Other: + assert(AllFlags == Other.AllFlags && "Cannot drop other flags"); + break; + } +} + FastMathFlags VPIRFlags::getFastMathFlags() const { assert(OpType == OperationType::FPMathOp && "recipe doesn't have fast math flags"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8e2fe95dd124b..24a1596f0d5c2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -2043,9 +2043,9 @@ void VPlanTransforms::cse(VPlan &Plan) { // V must dominate Def for a valid replacement. if (!VPDT.dominates(V->getParent(), VPBB)) continue; - // Drop poison-generating flags when reusing a value. + // Only keep flags present on both V and Def. if (auto *RFlags = dyn_cast(V)) - RFlags->dropPoisonGeneratingFlags(); + RFlags->intersectFlags(*cast(Def)); Def->replaceAllUsesWith(V); continue; } diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll index 36c3a2a612d82..db1f2c71e0f77 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll @@ -16,7 +16,7 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]]) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index df54411f7e710..c2dfce0aa70b8 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -142,8 +142,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] ; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP32]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[TMP34]]) -; CHECK-NEXT: [[TMP49:%.*]] = or <16 x i64> [[VEC_IND37]], splat (i64 1) -; CHECK-NEXT: [[TMP36:%.*]] = add <16 x i64> [[TMP30]], [[TMP49]] +; CHECK-NEXT: [[TMP49:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1) +; CHECK-NEXT: [[TMP36:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP49]] ; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP36]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP37]], i32 8, <16 x i1> [[TMP34]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[BROADCAST_SPLAT]]) @@ -191,8 +191,8 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP46]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[TMP48]]) -; CHECK-NEXT: [[TMP54:%.*]] = or <8 x i64> [[VEC_IND70]], splat (i64 1) -; CHECK-NEXT: [[TMP50:%.*]] = add <8 x i64> [[TMP44]], [[TMP54]] +; CHECK-NEXT: [[TMP54:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1) +; CHECK-NEXT: [[TMP50:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP54]] ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP50]], i64 0 ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP51]], i32 8, <8 x i1> [[TMP48]]) ; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[BROADCAST_SPLAT73]]) diff --git a/llvm/test/Transforms/LoopVectorize/flags.ll b/llvm/test/Transforms/LoopVectorize/flags.ll index cef8ea656afaa..7a4bfa9228925 100644 --- a/llvm/test/Transforms/LoopVectorize/flags.ll +++ b/llvm/test/Transforms/LoopVectorize/flags.ll @@ -175,7 +175,7 @@ define void @gep_with_shared_nusw_and_others(i64 %n, ptr %A) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw float, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: store <4 x float> [[WIDE_LOAD]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -215,3 +215,471 @@ loop: exit: ret void } + +define void @exact_on_first_shift(ptr noalias %A, ptr noalias %B) { +; CHECK-LABEL: define void @exact_on_first_shift( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[WIDE_LOAD]], splat (i32 10) +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[LSHR_1:%.*]] = lshr exact i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[LSHR_2:%.*]] = lshr i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i64 %iv + %l.A = load i32, ptr %gep.A, align 4 + %lshr.1 = lshr exact i32 %l.A, 10 + store i32 %lshr.1, ptr %gep.A, align 4 + %gep.B = getelementptr i32, ptr %B, i64 %iv + %lshr.2 = lshr i32 %l.A, 10 + store i32 %lshr.2, ptr %gep.B, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @exact_on_second_shift(ptr noalias %A, ptr noalias %B) { +; CHECK-LABEL: define void @exact_on_second_shift( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[WIDE_LOAD]], splat (i32 10) +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[TMP3]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[LSHR_1:%.*]] = lshr i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[LSHR_2:%.*]] = lshr exact i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i64 %iv + %l.A = load i32, ptr %gep.A, align 4 + %lshr.1 = lshr i32 %l.A, 10 + store i32 %lshr.1, ptr %gep.A, align 4 + %gep.B = getelementptr i32, ptr %B, i64 %iv + %lshr.2 = lshr exact i32 %l.A, 10 + store i32 %lshr.2, ptr %gep.B, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @exact_on_both_shifts(ptr noalias %A, ptr noalias %B) { +; CHECK-LABEL: define void @exact_on_both_shifts( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact <4 x i32> [[WIDE_LOAD]], splat (i32 10) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[LSHR_1:%.*]] = lshr exact i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[LSHR_1]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[LSHR_2:%.*]] = lshr exact i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[LSHR_2]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i64 %iv + %l.A = load i32, ptr %gep.A, align 4 + %lshr.1 = lshr exact i32 %l.A, 10 + store i32 %lshr.1, ptr %gep.A, align 4 + %gep.B = getelementptr i32, ptr %B, i64 %iv + %lshr.2 = lshr exact i32 %l.A, 10 + store i32 %lshr.2, ptr %gep.B, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @disjoint_on_first_or(ptr noalias %A, ptr noalias %B) { +; CHECK-LABEL: define void @disjoint_on_first_or( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 10) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[OR_2:%.*]] = or i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i64 %iv + %l.A = load i32, ptr %gep.A, align 4 + %or.1 = or disjoint i32 %l.A, 10 + store i32 %or.1, ptr %gep.A, align 4 + %gep.B = getelementptr i32, ptr %B, i64 %iv + %or.2 = or i32 %l.A, 10 + store i32 %or.2, ptr %gep.B, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @disjoint_on_second_or(ptr noalias %A, ptr noalias %B) { +; CHECK-LABEL: define void @disjoint_on_second_or( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 10) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[OR_1:%.*]] = or i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[OR_2:%.*]] = or disjoint i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i64 %iv + %l.A = load i32, ptr %gep.A, align 4 + %or.1 = or i32 %l.A, 10 + store i32 %or.1, ptr %gep.A, align 4 + %gep.B = getelementptr i32, ptr %B, i64 %iv + %or.2 = or disjoint i32 %l.A, 10 + store i32 %or.2, ptr %gep.B, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @disjoint_on_both_or(ptr noalias %A, ptr noalias %B) { +; CHECK-LABEL: define void @disjoint_on_both_or( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], splat (i32 10) +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[OR_1:%.*]] = or disjoint i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[OR_1]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i32, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[OR_2:%.*]] = or disjoint i32 [[L_A]], 10 +; CHECK-NEXT: store i32 [[OR_2]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i64 %iv + %l.A = load i32, ptr %gep.A, align 4 + %or.1 = or disjoint i32 %l.A, 10 + store i32 %or.1, ptr %gep.A, align 4 + %gep.B = getelementptr i32, ptr %B, i64 %iv + %or.2 = or disjoint i32 %l.A, 10 + store i32 %or.2, ptr %gep.B, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @trunc_flags_no_common(ptr noalias %A, ptr noalias %B, ptr noalias %C) { +; CHECK-LABEL: define void @trunc_flags_no_common( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[WIDE_LOAD]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[TRUNC_1:%.*]] = trunc nsw i32 [[L_A]] to i16 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i16, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: store i16 [[TRUNC_1]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[TRUNC_2:%.*]] = trunc nuw i32 [[L_A]] to i16 +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i16 [[TRUNC_2]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i64 %iv + %l.A = load i32, ptr %gep.A, align 4 + %trunc.1 = trunc nsw i32 %l.A to i16 + %gep.B = getelementptr i16, ptr %B, i64 %iv + store i16 %trunc.1, ptr %gep.B, align 4 + %trunc.2 = trunc nuw i32 %l.A to i16 + %gep.C = getelementptr i16, ptr %C, i64 %iv + store i16 %trunc.2, ptr %gep.B, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @trunc_flags_common(ptr noalias %A, ptr noalias %B, ptr noalias %C) { +; CHECK-LABEL: define void @trunc_flags_common( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw <4 x i32> [[WIDE_LOAD]] to <4 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 128, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[L_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[TRUNC_1:%.*]] = trunc nuw i32 [[L_A]] to i16 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i16, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: store i16 [[TRUNC_1]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[TRUNC_2:%.*]] = trunc nuw nsw i32 [[L_A]] to i16 +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr i16, ptr [[C]], i64 [[IV]] +; CHECK-NEXT: store i16 [[TRUNC_2]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 128 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A = getelementptr i32, ptr %A, i64 %iv + %l.A = load i32, ptr %gep.A, align 4 + %trunc.1 = trunc nuw i32 %l.A to i16 + %gep.B = getelementptr i16, ptr %B, i64 %iv + store i16 %trunc.1, ptr %gep.B, align 4 + %trunc.2 = trunc nuw nsw i32 %l.A to i16 + %gep.C = getelementptr i16, ptr %C, i64 %iv + store i16 %trunc.2, ptr %gep.B, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 128 + br i1 %ec, label %exit, label %loop + +exit: + ret void +}