diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll index 587fca649a40f..8ba03b58d5069 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll @@ -1,28 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" define void @foo(i64* %ptr, i32* %ptr.2) { ; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_21:%.*]] = bitcast i32* [[PTR_2:%.*]] to i8* +; CHECK-NEXT: [[PTR3:%.*]] = bitcast i64* [[PTR:%.*]] to i8* +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[PTR_2]], i64 1 +; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i64, i64* [[PTR]], i64 80 +; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i64* [[SCEVGEP4]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[PTR_21]], [[SCEVGEP45]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[PTR3]], [[SCEVGEP2]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]] -; CHECK-NEXT: [[TRUNC:%.+]] = trunc i64 [[OFFSET_IDX]] to i32 -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TRUNC]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TRUNC]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TRUNC]], 2 -; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TRUNC]], 3 -; CHECK-NEXT: = add i64 [[INDEX]], 0 -; CHECK-NEXT: store i32 [[TMP7]], i32* %ptr.2, align 4 -; CHECK-NEXT: store i32 [[TMP8]], i32* %ptr.2, align 4 -; CHECK-NEXT: store i32 [[TMP9]], i32* %ptr.2, align 4 -; CHECK-NEXT: store i32 [[TMP10]], i32* %ptr.2, align 4 -; CHECK: store <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[OFFSET_IDX]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: store i32 [[TMP1]], i32* [[PTR_2]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: store i32 [[TMP2]], i32* [[PTR_2]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: store i32 [[TMP3]], i32* [[PTR_2]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: store i32 [[TMP4]], i32* [[PTR_2]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <4 x i64>* +; CHECK-NEXT: store <4 x i64> [[VEC_IND]], <4 x i64>* [[TMP8]], align 8, !alias.scope !3 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80 +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 80, 80 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 80, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ 82, [[MIDDLE_BLOCK]] ], [ 2, [[ENTRY]] ], [ 2, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.scevcheck: +; CHECK-NEXT: unreachable +; CHECK: loop: +; CHECK-NEXT: [[CAN_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[CAN_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 4294967295 +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP10]] to i32 +; CHECK-NEXT: store i32 [[TMP12]], i32* [[PTR_2]], align 4 +; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 [[CAN_IV]] +; CHECK-NEXT: store i64 [[TMP10]], i64* [[GEP_PTR]], align 8 +; CHECK-NEXT: [[TMP13]] = add nuw nsw i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], 80 +; CHECK-NEXT: [[CAN_IV_NEXT]] = add nuw nsw i64 [[CAN_IV]], 1 +; CHECK-NEXT: br i1 [[TMP14]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %loop