diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index aed249f5032c8..f79855f7e2c5f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1902,6 +1902,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags, void execute(VPTransformState &State) override; + Type *getSourceElementType() const { return IndexedTy; } + bool onlyFirstLaneUsed(const VPValue *Op) const override { assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 503140213c116..dcc368933f2a1 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1978,7 +1978,7 @@ struct VPCSEDenseMapInfo : public DenseMapInfo { return TypeSwitch>>(R) .Case( + VPWidenSelectRecipe, VPWidenGEPRecipe, VPReplicateRecipe>( [](auto *I) { return std::make_pair(false, I->getOpcode()); }) .Case([](auto *I) { return std::make_pair(true, I->getVectorIntrinsicID()); @@ -1986,12 +1986,31 @@ struct VPCSEDenseMapInfo : public DenseMapInfo { .Default([](auto *) { return std::nullopt; }); } + /// If recipe \p R will lower to a GEP with a non-i8 source element type, + /// return that source element type. + static Type *getGEPSourceElementType(const VPSingleDefRecipe *R) { + // All VPInstructions that lower to GEPs must have the i8 source element + // type (as they are PtrAdds), so we omit it. + return TypeSwitch(R) + .Case([](auto *I) -> Type * { + if (auto *GEP = dyn_cast(I->getUnderlyingValue())) + return GEP->getSourceElementType(); + return nullptr; + }) + .Case( + [](auto *I) { return I->getSourceElementType(); }) + .Default([](auto *) { return nullptr; }); + } + /// Returns true if recipe \p Def can be safely handed for CSE. static bool canHandle(const VPSingleDefRecipe *Def) { // We can extend the list of handled recipes in the future, // provided we account for the data embedded in them while checking for - // equality or hashing. - auto C = getOpcodeOrIntrinsicID(Def); + // equality or hashing. We assign VPVectorEndPointerRecipe the GEP opcode, + // as it is essentially a GEP with different semantics. + auto C = isa(Def) + ? std::make_pair(false, Instruction::GetElementPtr) + : getOpcodeOrIntrinsicID(Def); // The issue with (Insert|Extract)Value is that the index of the // insert/extract is not a proper operand in LLVM IR, and hence also not in @@ -2012,8 +2031,8 @@ struct VPCSEDenseMapInfo : public DenseMapInfo { VPTypeAnalysis TypeInfo(*Plan); hash_code Result = hash_combine( Def->getVPDefID(), getOpcodeOrIntrinsicID(Def), - TypeInfo.inferScalarType(Def), vputils::isSingleScalar(Def), - hash_combine_range(Def->operands())); + getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def), + vputils::isSingleScalar(Def), hash_combine_range(Def->operands())); if (auto *RFlags = dyn_cast(Def)) if (RFlags->hasPredicate()) return hash_combine(Result, RFlags->getPredicate()); @@ -2026,6 +2045,7 @@ struct VPCSEDenseMapInfo : public DenseMapInfo { return L == R; if (L->getVPDefID() != R->getVPDefID() || getOpcodeOrIntrinsicID(L) != getOpcodeOrIntrinsicID(R) || + getGEPSourceElementType(L) != getGEPSourceElementType(R) || vputils::isSingleScalar(L) != vputils::isSingleScalar(R) || !equal(L->operands(), R->operands())) return false; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 6f605acd7ecbe..32485c7428908 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -518,11 +518,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP13:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP15]] ; CHECK-NEXT: store [[TMP12]], ptr [[TMP8]], align 4 -; CHECK-NEXT: store [[TMP13]], ptr [[TMP16]], align 4 +; CHECK-NEXT: store [[TMP13]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] @@ -584,11 +581,8 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-VF8-NEXT: [[TMP11:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2 -; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP13]] ; CHECK-VF8-NEXT: store [[TMP10]], ptr [[TMP6]], align 4 -; CHECK-VF8-NEXT: store [[TMP11]], ptr [[TMP14]], align 4 +; CHECK-VF8-NEXT: store [[TMP11]], ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -656,11 +650,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP11]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP13:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP15:%.*]] = shl nuw i64 [[TMP14]], 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 [[TMP15]] ; CHECK-NEXT: store [[TMP12]], ptr [[TMP8]], align 4 -; CHECK-NEXT: store [[TMP13]], ptr [[TMP16]], align 4 +; CHECK-NEXT: store [[TMP13]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -719,11 +710,8 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-VF8-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-VF8-NEXT: [[TMP11:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-VF8-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 2 -; CHECK-VF8-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 [[TMP13]] ; CHECK-VF8-NEXT: store [[TMP10]], ptr [[TMP6]], align 4 -; CHECK-VF8-NEXT: store [[TMP11]], ptr [[TMP14]], align 4 +; CHECK-VF8-NEXT: store [[TMP11]], ptr [[TMP9]], align 4 ; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-VF8-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll index 4444be36c3567..643ff29136d22 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vscale-based-trip-counts.ll @@ -76,11 +76,8 @@ define void @vscale_mul_8(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP17]], ptr [[B]], align 4 -; CHECK-NEXT: store [[TMP18]], ptr [[TMP22]], align 4 +; CHECK-NEXT: store [[TMP18]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]] ; CHECK: [[FOR_COND_CLEANUP]]: @@ -216,11 +213,8 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]] ; CHECK-NEXT: store [[TMP17]], ptr [[TMP12]], align 4 -; CHECK-NEXT: store [[TMP18]], ptr [[TMP21]], align 4 +; CHECK-NEXT: store [[TMP18]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -296,11 +290,8 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP17:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = shl nuw i64 [[TMP19]], 2 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw float, ptr [[TMP12]], i64 [[TMP20]] ; CHECK-NEXT: store [[TMP17]], ptr [[TMP12]], align 4 -; CHECK-NEXT: store [[TMP18]], ptr [[TMP21]], align 4 +; CHECK-NEXT: store [[TMP18]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -378,11 +369,8 @@ define void @trip_count_with_overflow(ptr noalias noundef readonly captures(none ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP19:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP18]], ptr [[TMP13]], align 4 -; CHECK-NEXT: store [[TMP19]], ptr [[TMP22]], align 4 +; CHECK-NEXT: store [[TMP19]], ptr [[TMP17]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -455,11 +443,8 @@ define void @trip_count_too_big_for_element_count(ptr noalias noundef readonly c ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load , ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = fmul [[WIDE_LOAD]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP19:%.*]] = fmul [[WIDE_LOAD1]], [[WIDE_LOAD3]] -; CHECK-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP21:%.*]] = shl nuw i64 [[TMP20]], 2 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw float, ptr [[TMP13]], i64 [[TMP21]] ; CHECK-NEXT: store [[TMP18]], ptr [[TMP13]], align 4 -; CHECK-NEXT: store [[TMP19]], ptr [[TMP22]], align 4 +; CHECK-NEXT: store [[TMP19]], ptr [[TMP17]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 7a3d81b240394..ce3e734bdd84d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -430,11 +430,8 @@ define void @single_stride_int_scaled(ptr %p, i64 %stride) { ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD1]], splat (i32 1) -; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 -; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]] ; NOSTRIDED-UF2-NEXT: store [[TMP8]], ptr [[TMP4]], align 4 -; NOSTRIDED-UF2-NEXT: store [[TMP9]], ptr [[TMP12]], align 4 +; NOSTRIDED-UF2-NEXT: store [[TMP9]], ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -557,11 +554,8 @@ define void @single_stride_int_iv(ptr %p, i64 %stride) { ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD1]], splat (i32 1) -; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 -; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]] ; NOSTRIDED-UF2-NEXT: store [[TMP8]], ptr [[TMP4]], align 4 -; NOSTRIDED-UF2-NEXT: store [[TMP9]], ptr [[TMP12]], align 4 +; NOSTRIDED-UF2-NEXT: store [[TMP9]], ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -1060,11 +1054,8 @@ define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED-UF2-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[TMP8:%.*]] = add [[WIDE_LOAD]], splat (i32 1) ; NOSTRIDED-UF2-NEXT: [[TMP9:%.*]] = add [[WIDE_LOAD1]], splat (i32 1) -; NOSTRIDED-UF2-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; NOSTRIDED-UF2-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2 -; NOSTRIDED-UF2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP4]], i64 [[TMP11]] ; NOSTRIDED-UF2-NEXT: store [[TMP8]], ptr [[TMP4]], align 4 -; NOSTRIDED-UF2-NEXT: store [[TMP9]], ptr [[TMP12]], align 4 +; NOSTRIDED-UF2-NEXT: store [[TMP9]], ptr [[TMP7]], align 4 ; NOSTRIDED-UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; NOSTRIDED-UF2-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; NOSTRIDED-UF2-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll index 62d08c8668235..9698c33d8e08c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll @@ -36,13 +36,10 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP9:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP10:%.*]] = fadd <8 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24 ; CHECK-NEXT: store <8 x float> [[TMP7]], ptr [[A]], align 4 -; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP11]], align 4 -; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP12]], align 4 -; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP13]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP8]], ptr [[TMP4]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[TMP5]], align 4 +; CHECK-NEXT: store <8 x float> [[TMP10]], ptr [[TMP6]], align 4 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; diff --git a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll new file mode 100644 index 0000000000000..49eb8b349a274 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 +; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s + +; Verify that we check the source element of GEPs when performing a CSE. + +define void @cse_replicate_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n) { +; CHECK-LABEL: define void @cse_replicate_gep( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP8]], i32 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP3]], align 4 +; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD1]], ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP5]], i32 4 +; CHECK-NEXT: store <4 x i16> [[WIDE_LOAD2]], ptr [[TMP5]], align 2 +; CHECK-NEXT: store <4 x i16> [[WIDE_LOAD3]], ptr [[TMP6]], align 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A.32 = getelementptr i32, ptr %A, i64 %iv + %l.32 = load i32, ptr %gep.A.32 + %gep.A.16 = getelementptr i16, ptr %A, i64 %iv + %l.16 = load i16, ptr %gep.A.16 + %gep.B = getelementptr i32, ptr %B, i64 %iv + store i32 %l.32, ptr %gep.B + %gep.C = getelementptr i16, ptr %C, i64 %iv + store i16 %l.16, ptr %gep.C + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} + +define void @cse_wide_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n) { +; CHECK-LABEL: define void @cse_wide_gep( +; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], <4 x i64> [[STEP_ADD]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[STEP_ADD]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i32 4 +; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8 +; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX1]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 4 +; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP6]], align 8 +; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.A.32 = getelementptr i32, ptr %A, i64 %iv + %gep.A.16 = getelementptr i16, ptr %A, i64 %iv + %gep.B = getelementptr i64, ptr %B, i64 %iv + store ptr %gep.A.32, ptr %gep.B + %gep.C = getelementptr i64, ptr %C, i64 %iv + store ptr %gep.A.16, ptr %gep.C + %iv.next = add nuw nsw i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, %n + br i1 %exit.cond, label %exit, label %loop + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll index 38dbbbb21583a..a9118da233e33 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll @@ -60,9 +60,8 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) ; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10) -; VF8UF2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i32 8 ; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[A]], align 1 -; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP6]], align 1 +; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[TMP2]], align 1 ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index 27fa57928aa96..5329d9b42befa 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -66,9 +66,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; VF8UF2-NEXT: [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) ; VF8UF2-NEXT: [[TMP5:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10) -; VF8UF2-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 8 ; VF8UF2-NEXT: store <8 x i8> [[TMP4]], ptr [[A]], align 1 -; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP7]], align 1 +; VF8UF2-NEXT: store <8 x i8> [[TMP5]], ptr [[TMP3]], align 1 ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VF8UF2: [[MIDDLE_BLOCK]]: ; VF8UF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll index 2a7ffec27c2f9..12c5950d3a171 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-before-execute.ll @@ -82,9 +82,8 @@ define void @test_tc_less_than_16(ptr %A, i64 %N) { ; CHECK-NEXT: WIDEN ir<%l>.1 = load vp<[[VPTR2]]> ; CHECK-NEXT: WIDEN ir<%add> = add nsw ir<%l>, ir<10> ; CHECK-NEXT: WIDEN ir<%add>.1 = add nsw ir<%l>.1, ir<10> -; CHECK-NEXT: vp<[[VPTR4:%.+]]> = vector-pointer ir<%A>, ir<1> ; CHECK-NEXT: WIDEN store ir<%A>, ir<%add> -; CHECK-NEXT: WIDEN store vp<[[VPTR4]]>, ir<%add>.1 +; CHECK-NEXT: WIDEN store vp<[[VPTR2]]>, ir<%add>.1 ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll index e74bf592e1525..670c2d9108d4e 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -168,9 +168,8 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) { ; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x float> [[TMP7]], [[WIDE_LOAD11]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[TMP6]], <4 x float> [[TMP10]] ; CHECK-NEXT: [[PREDPHI12:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]] -; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]] +; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META11]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]