diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index fd4b962a292d9..c308a37ab9085 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -14,24 +14,17 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; ; ; -define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) { +define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) { ; CHECK-LABEL: @recurrence_1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A2:%.*]] = ptrtoint i32* [[A:%.*]] to i64 -; CHECK-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 ; CHECK-NEXT: br label [[FOR_PREHEADER:%.*]] ; CHECK: for.preheader: -; CHECK-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4 ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[A2]], 4 -; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[TMP3]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 16 -; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i64 3 @@ -39,34 +32,34 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[SCALAR_BODY:%.*]] ; CHECK: scalar.body: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[SCALAR_BODY]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP13]] = load i32, i32* [[ARRAYIDX32]], align 4 +; CHECK-NEXT: [[TMP11]] = load i32, i32* [[ARRAYIDX32]], align 4 ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[ADD35:%.*]] = add i32 [[TMP13]], [[SCALAR_RECUR]] +; CHECK-NEXT: [[ADD35:%.*]] = add i32 [[TMP11]], [[SCALAR_RECUR]] ; CHECK-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -76,64 +69,57 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; ; UNROLL-LABEL: @recurrence_1( ; UNROLL-NEXT: entry: -; UNROLL-NEXT: [[A2:%.*]] = ptrtoint i32* [[A:%.*]] to i64 -; UNROLL-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 ; UNROLL-NEXT: br label [[FOR_PREHEADER:%.*]] ; UNROLL: for.preheader: -; UNROLL-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[A]], align 4 +; UNROLL-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4 ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL: vector.memcheck: -; UNROLL-NEXT: [[TMP3:%.*]] = add i64 [[A2]], 4 -; UNROLL-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[TMP3]] -; UNROLL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 32 -; UNROLL-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 ; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i64 3 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD3:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] +; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 +; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] +; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* +; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4 +; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 4 ; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* -; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 -; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP6]], i64 4 -; UNROLL-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* -; UNROLL-NEXT: [[WIDE_LOAD3]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 -; UNROLL-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD3]], <4 x i32> -; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; UNROLL-NEXT: [[TMP13:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP10]] -; UNROLL-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD3]], [[TMP11]] -; UNROLL-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP15]], align 4 -; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i64 4 -; UNROLL-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP17]], align 4 +; UNROLL-NEXT: [[WIDE_LOAD1]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 +; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; UNROLL-NEXT: [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]] +; UNROLL-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP9]] +; UNROLL-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP13]], align 4 +; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 4 +; UNROLL-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP15]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; UNROLL-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 3 +; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i64 3 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL: scalar.body: -; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NEXT: [[TMP19]] = load i32, i32* [[ARRAYIDX32]], align 4 +; UNROLL-NEXT: [[TMP17]] = load i32, i32* [[ARRAYIDX32]], align 4 ; UNROLL-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; UNROLL-NEXT: [[ADD35:%.*]] = add i32 [[TMP19]], [[SCALAR_RECUR]] +; UNROLL-NEXT: [[ADD35:%.*]] = add i32 [[TMP17]], [[SCALAR_RECUR]] ; UNROLL-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -143,22 +129,15 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; ; UNROLL-NO-IC-LABEL: @recurrence_1( ; UNROLL-NO-IC-NEXT: entry: -; UNROLL-NO-IC-NEXT: [[A2:%.*]] = ptrtoint i32* [[A:%.*]] to i64 -; UNROLL-NO-IC-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 ; UNROLL-NO-IC-NEXT: br label [[FOR_PREHEADER:%.*]] ; UNROLL-NO-IC: for.preheader: -; UNROLL-NO-IC-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 0 +; UNROLL-NO-IC-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0 ; UNROLL-NO-IC-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-IC: vector.memcheck: -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[A2]], 4 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[TMP3]] -; UNROLL-NO-IC-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 32 -; UNROLL-NO-IC-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] @@ -166,51 +145,51 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD3:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP5]], 1 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP6]], 1 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]] -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 1 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD3]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD3]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP15]] -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = add <4 x i32> [[WIDE_LOAD3]], [[TMP16]] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]] +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP13]] +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP14]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP20]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 4 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP22]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP24]], align 4 +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP22]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i32 3 -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-IC-NEXT: [[TMP26]] = load i32, i32* [[ARRAYIDX32]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP24]] = load i32, i32* [[ARRAYIDX32]], align 4 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; UNROLL-NO-IC-NEXT: [[ADD35:%.*]] = add i32 [[TMP26]], [[SCALAR_RECUR]] +; UNROLL-NO-IC-NEXT: [[ADD35:%.*]] = add i32 [[TMP24]], [[SCALAR_RECUR]] ; UNROLL-NO-IC-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -220,61 +199,54 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; ; UNROLL-NO-VF-LABEL: @recurrence_1( ; UNROLL-NO-VF-NEXT: entry: -; UNROLL-NO-VF-NEXT: [[A2:%.*]] = ptrtoint i32* [[A:%.*]] to i64 -; UNROLL-NO-VF-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 ; UNROLL-NO-VF-NEXT: br label [[FOR_PREHEADER:%.*]] ; UNROLL-NO-VF: for.preheader: -; UNROLL-NO-VF-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 0 +; UNROLL-NO-VF-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0 ; UNROLL-NO-VF-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-VF: vector.memcheck: -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i64 [[A2]], 4 -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[TMP3]] -; UNROLL-NO-VF-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 8 -; UNROLL-NO-VF-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[INDUCTION]], 1 -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[INDUCTION3]], 1 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP10]] = load i32, i32* [[TMP8]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION3]] -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = add i32 [[TMP9]], [[VECTOR_RECUR]] -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = add i32 [[TMP10]], [[TMP9]] -; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], i32* [[TMP11]], align 4 -; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], i32* [[TMP12]], align 4 +; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDUCTION]], 1 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1 +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP8]] = load i32, i32* [[TMP6]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]] +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = add i32 [[TMP7]], [[VECTOR_RECUR]] +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = add i32 [[TMP8]], [[TMP7]] +; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP9]], align 4 +; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-VF-NEXT: [[TMP16]] = load i32, i32* [[ARRAYIDX32]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP14]] = load i32, i32* [[ARRAYIDX32]], align 4 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; UNROLL-NO-VF-NEXT: [[ADD35:%.*]] = add i32 [[TMP16]], [[SCALAR_RECUR]] +; UNROLL-NO-VF-NEXT: [[ADD35:%.*]] = add i32 [[TMP14]], [[SCALAR_RECUR]] ; UNROLL-NO-VF-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -284,22 +256,15 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; ; SINK-AFTER-LABEL: @recurrence_1( ; SINK-AFTER-NEXT: entry: -; SINK-AFTER-NEXT: [[A2:%.*]] = ptrtoint i32* [[A:%.*]] to i64 -; SINK-AFTER-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 ; SINK-AFTER-NEXT: br label [[FOR_PREHEADER:%.*]] ; SINK-AFTER: for.preheader: -; SINK-AFTER-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 0 +; SINK-AFTER-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0 ; SINK-AFTER-NEXT: [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 -; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; SINK-AFTER: vector.memcheck: -; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[A2]], 4 -; SINK-AFTER-NEXT: [[TMP4:%.*]] = sub i64 [[B1]], [[TMP3]] -; SINK-AFTER-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP4]], 16 -; SINK-AFTER-NEXT: br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] @@ -308,38 +273,38 @@ define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1 -; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] -; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0 -; SINK-AFTER-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 -; SINK-AFTER-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP5]] -; SINK-AFTER-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP10]] -; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 -; SINK-AFTER-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP14]], align 4 +; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] +; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 +; SINK-AFTER-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 +; SINK-AFTER-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]] +; SINK-AFTER-NEXT: [[TMP10:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]] +; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 +; SINK-AFTER-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* +; SINK-AFTER-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; SINK-AFTER-NEXT: [[TMP16]] = load i32, i32* [[ARRAYIDX32]], align 4 +; SINK-AFTER-NEXT: [[TMP14]] = load i32, i32* [[ARRAYIDX32]], align 4 ; SINK-AFTER-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] -; SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP16]], [[SCALAR_RECUR]] +; SINK-AFTER-NEXT: [[ADD35:%.*]] = add i32 [[TMP14]], [[SCALAR_RECUR]] ; SINK-AFTER-NEXT: store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4 ; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] @@ -411,7 +376,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; CHECK-NEXT: [[TMP8]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP7]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] @@ -440,7 +405,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; ; UNROLL-LABEL: @recurrence_2( ; UNROLL-NEXT: entry: @@ -479,7 +444,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NEXT: [[TMP14]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP12]]) ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP16:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP13]], <4 x i32> [[TMP14]]) ; UNROLL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP16]]) @@ -509,7 +474,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @recurrence_2( ; UNROLL-NO-IC-NEXT: entry: @@ -557,7 +522,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP22]] = select <4 x i1> [[TMP20]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP18]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP21]], [[TMP22]] ; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP21]], <4 x i32> [[TMP22]] @@ -591,7 +556,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @recurrence_2( ; UNROLL-NO-VF-NEXT: entry: @@ -699,7 +664,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: [[TMP12]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP10]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP12]]) ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] @@ -731,7 +696,7 @@ define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; entry: %cmp27 = icmp sgt i32 %n, 0 @@ -776,7 +741,7 @@ scalar.body: ; Check also that the casts were not moved needlessly. ; ; -define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 %n, float %f, i16 %p) { +define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n, float %f, i16 %p) { ; CHECK-LABEL: @recurrence_3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2 @@ -793,21 +758,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2 -; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2 -; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]] -; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16* [[SCEVGEP6]] to double* -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult double* [[SCEVGEP]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = bitcast double* [[SCEVGEP2]] to i16* -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP4]], [[TMP8]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934588 ; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[N_VEC]], 1 @@ -819,34 +770,34 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !6 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> -; CHECK-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double> -; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP17]], align 8, !alias.scope !9, !noalias !6 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> +; CHECK-NEXT: [[TMP8:%.*]] = sitofp <4 x i16> [[TMP6]] to <4 x double> +; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = fsub fast <4 x double> [[TMP7]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP10]], <4 x double>* [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[SCALAR_BODY:%.*]] ; CHECK: scalar.body: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]] -; CHECK-NEXT: [[TMP19]] = load i16, i16* [[ARRAYIDX5]], align 2 -; CHECK-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP19]] to double +; CHECK-NEXT: [[TMP14]] = load i16, i16* [[ARRAYIDX5]], align 2 +; CHECK-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP14]] to double ; CHECK-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double ; CHECK-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; CHECK-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] @@ -855,7 +806,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; CHECK-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: @@ -877,73 +828,59 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL: vector.memcheck: -; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1 -; UNROLL-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2 -; UNROLL-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; UNROLL-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2 -; UNROLL-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]] -; UNROLL-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]] -; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[SCEVGEP6]] to double* -; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ult double* [[SCEVGEP]], [[TMP7]] -; UNROLL-NEXT: [[TMP8:%.*]] = bitcast double* [[SCEVGEP2]] to i16* -; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP4]], [[TMP8]] -; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934584 ; UNROLL-NEXT: [[IND_END:%.*]] = or i64 [[N_VEC]], 1 ; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 3 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0 ; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer -; UNROLL-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0 -; UNROLL-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT9]], <4 x double> poison, <4 x i32> zeroinitializer +; UNROLL-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0 +; UNROLL-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD8:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]] -; UNROLL-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !6 -; UNROLL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i64 4 -; UNROLL-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2, !alias.scope !6 -; UNROLL-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NEXT: [[TMP14:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD8]], <4 x i32> -; UNROLL-NEXT: [[TMP15:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> -; UNROLL-NEXT: [[TMP16:%.*]] = sitofp <4 x i16> [[WIDE_LOAD8]] to <4 x double> -; UNROLL-NEXT: [[TMP17:%.*]] = sitofp <4 x i16> [[TMP13]] to <4 x double> -; UNROLL-NEXT: [[TMP18:%.*]] = sitofp <4 x i16> [[TMP14]] to <4 x double> -; UNROLL-NEXT: [[TMP19:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP17]] -; UNROLL-NEXT: [[TMP20:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT10]], [[TMP18]] -; UNROLL-NEXT: [[TMP21:%.*]] = fsub fast <4 x double> [[TMP15]], [[TMP19]] -; UNROLL-NEXT: [[TMP22:%.*]] = fsub fast <4 x double> [[TMP16]], [[TMP20]] -; UNROLL-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]] -; UNROLL-NEXT: [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>* -; UNROLL-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[TMP24]], align 8, !alias.scope !9, !noalias !6 -; UNROLL-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 4 -; UNROLL-NEXT: [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>* -; UNROLL-NEXT: store <4 x double> [[TMP22]], <4 x double>* [[TMP26]], align 8, !alias.scope !9, !noalias !6 +; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]] +; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* +; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2 +; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4 +; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* +; UNROLL-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2 +; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NEXT: [[TMP10:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> +; UNROLL-NEXT: [[TMP11:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double> +; UNROLL-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[TMP8]] to <4 x double> +; UNROLL-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP9]] to <4 x double> +; UNROLL-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP12]] +; UNROLL-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT3]], [[TMP13]] +; UNROLL-NEXT: [[TMP16:%.*]] = fsub fast <4 x double> [[TMP10]], [[TMP14]] +; UNROLL-NEXT: [[TMP17:%.*]] = fsub fast <4 x double> [[TMP11]], [[TMP15]] +; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]] +; UNROLL-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>* +; UNROLL-NEXT: store <4 x double> [[TMP16]], <4 x double>* [[TMP19]], align 8 +; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 4 +; UNROLL-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>* +; UNROLL-NEXT: store <4 x double> [[TMP17]], <4 x double>* [[TMP21]], align 8 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] -; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i64 3 +; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL: scalar.body: -; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]] -; UNROLL-NEXT: [[TMP28]] = load i16, i16* [[ARRAYIDX5]], align 2 -; UNROLL-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP28]] to double +; UNROLL-NEXT: [[TMP23]] = load i16, i16* [[ARRAYIDX5]], align 2 +; UNROLL-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP23]] to double ; UNROLL-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double ; UNROLL-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; UNROLL-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] @@ -952,7 +889,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; UNROLL-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; UNROLL: for.end.loopexit: ; UNROLL-NEXT: br label [[FOR_END]] ; UNROLL: for.end: @@ -974,23 +911,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-IC: vector.memcheck: -; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1 -; UNROLL-NO-IC-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8* -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2 -; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]] -; UNROLL-NO-IC-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8* -; UNROLL-NO-IC-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NO-IC-NEXT: [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8* -; UNROLL-NO-IC-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]] -; UNROLL-NO-IC-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8* -; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]] -; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]] -; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] @@ -998,59 +919,59 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0 -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT9]], <4 x double> poison, <4 x i32> zeroinitializer +; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0 +; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD8:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP8]] -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2, !alias.scope !6 -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = bitcast i16* [[TMP13]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP14]], align 2, !alias.scope !6 -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD8]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sitofp <4 x i16> [[WIDE_LOAD8]] to <4 x double> -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = sitofp <4 x i16> [[TMP15]] to <4 x double> -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = sitofp <4 x i16> [[TMP16]] to <4 x double> -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = fmul fast <4 x double> [[TMP19]], [[BROADCAST_SPLAT]] -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = fmul fast <4 x double> [[TMP20]], [[BROADCAST_SPLAT10]] -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = fsub fast <4 x double> [[TMP17]], [[TMP21]] -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = fsub fast <4 x double> [[TMP18]], [[TMP22]] -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]] -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP8]] -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = bitcast double* [[TMP27]] to <4 x double>* -; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP23]], <4 x double>* [[TMP28]], align 8, !alias.scope !9, !noalias !6 -; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* -; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP24]], <4 x double>* [[TMP30]], align 8, !alias.scope !9, !noalias !6 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>* +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to <4 x i16>* +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP11]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double> +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = sitofp <4 x i16> [[TMP12]] to <4 x double> +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sitofp <4 x i16> [[TMP13]] to <4 x double> +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = fmul fast <4 x double> [[TMP16]], [[BROADCAST_SPLAT]] +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = fmul fast <4 x double> [[TMP17]], [[BROADCAST_SPLAT3]] +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = fsub fast <4 x double> [[TMP14]], [[TMP18]] +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = fsub fast <4 x double> [[TMP15]], [[TMP19]] +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP5]] +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>* +; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[TMP25]], align 8 +; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>* +; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP21]], <4 x double>* [[TMP27]], align 8 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i32 3 -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; UNROLL-NO-IC-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-IC: scalar.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP32:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP29:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]] -; UNROLL-NO-IC-NEXT: [[TMP32]] = load i16, i16* [[ARRAYIDX5]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP32]] to double +; UNROLL-NO-IC-NEXT: [[TMP29]] = load i16, i16* [[ARRAYIDX5]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP29]] to double ; UNROLL-NO-IC-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double ; UNROLL-NO-IC-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; UNROLL-NO-IC-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] @@ -1059,7 +980,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-IC-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; UNROLL-NO-IC: for.end.loopexit: ; UNROLL-NO-IC-NEXT: br label [[FOR_END]] ; UNROLL-NO-IC: for.end: @@ -1081,23 +1002,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-VF: vector.memcheck: -; UNROLL-NO-VF-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1 -; UNROLL-NO-VF-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8* -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2 -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2 -; UNROLL-NO-VF-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]] -; UNROLL-NO-VF-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8* -; UNROLL-NO-VF-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NO-VF-NEXT: [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8* -; UNROLL-NO-VF-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]] -; UNROLL-NO-VF-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8* -; UNROLL-NO-VF-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]] -; UNROLL-NO-VF-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]] -; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NO-VF-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] @@ -1105,42 +1010,42 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION8:%.*]] = add i64 [[OFFSET_IDX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION8]] -; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP7]], align 2, !alias.scope !5 -; UNROLL-NO-VF-NEXT: [[TMP10]] = load i16, i16* [[TMP8]], align 2, !alias.scope !5 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sitofp i16 [[TMP9]] to double -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sitofp i16 [[TMP10]] to double -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = sitofp i16 [[TMP9]] to double -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = fmul fast double [[TMP13]], [[CONV1]] -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = fmul fast double [[TMP14]], [[CONV1]] -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = fsub fast double [[TMP11]], [[TMP15]] -; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = fsub fast double [[TMP12]], [[TMP16]] -; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION8]] -; UNROLL-NO-VF-NEXT: store double [[TMP17]], double* [[TMP19]], align 8, !alias.scope !8, !noalias !5 -; UNROLL-NO-VF-NEXT: store double [[TMP18]], double* [[TMP20]], align 8, !alias.scope !8, !noalias !5 +; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION1]] +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = load i16, i16* [[TMP4]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP7]] = load i16, i16* [[TMP5]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sitofp i16 [[TMP6]] to double +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sitofp i16 [[TMP7]] to double +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sitofp i16 [[TMP6]] to double +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = fmul fast double [[TMP10]], [[CONV1]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = fmul fast double [[TMP11]], [[CONV1]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = fsub fast double [[TMP8]], [[TMP12]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = fsub fast double [[TMP9]], [[TMP13]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION1]] +; UNROLL-NO-VF-NEXT: store double [[TMP14]], double* [[TMP16]], align 8 +; UNROLL-NO-VF-NEXT: store double [[TMP15]], double* [[TMP17]], align 8 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL-NO-VF: scalar.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[SCALAR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]] -; UNROLL-NO-VF-NEXT: [[TMP22]] = load i16, i16* [[ARRAYIDX5]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP22]] to double +; UNROLL-NO-VF-NEXT: [[TMP19]] = load i16, i16* [[ARRAYIDX5]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP19]] to double ; UNROLL-NO-VF-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double ; UNROLL-NO-VF-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; UNROLL-NO-VF-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] @@ -1149,7 +1054,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; UNROLL-NO-VF-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NO-VF: for.end.loopexit: ; UNROLL-NO-VF-NEXT: br label [[FOR_END]] ; UNROLL-NO-VF: for.end: @@ -1171,23 +1076,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; SINK-AFTER-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 -; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; SINK-AFTER: vector.memcheck: -; SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1 -; SINK-AFTER-NEXT: [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8* -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i32 [[N]], -2 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2 -; SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]] -; SINK-AFTER-NEXT: [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8* -; SINK-AFTER-NEXT: [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; SINK-AFTER-NEXT: [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8* -; SINK-AFTER-NEXT: [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]] -; SINK-AFTER-NEXT: [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8* -; SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]] -; SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]] -; SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] @@ -1200,38 +1089,38 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] -; SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 -; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP7]] -; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0 -; SINK-AFTER-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !6 -; SINK-AFTER-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> -; SINK-AFTER-NEXT: [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double> -; SINK-AFTER-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[TMP13]], [[BROADCAST_SPLAT]] -; SINK-AFTER-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP14]] -; SINK-AFTER-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]] -; SINK-AFTER-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 0 -; SINK-AFTER-NEXT: [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>* -; SINK-AFTER-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP18]], align 8, !alias.scope !9, !noalias !6 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]] +; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 +; SINK-AFTER-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2 +; SINK-AFTER-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP9:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double> +; SINK-AFTER-NEXT: [[TMP10:%.*]] = sitofp <4 x i16> [[TMP8]] to <4 x double> +; SINK-AFTER-NEXT: [[TMP11:%.*]] = fmul fast <4 x double> [[TMP10]], [[BROADCAST_SPLAT]] +; SINK-AFTER-NEXT: [[TMP12:%.*]] = fsub fast <4 x double> [[TMP9]], [[TMP11]] +; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]] +; SINK-AFTER-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, double* [[TMP13]], i32 0 +; SINK-AFTER-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>* +; SINK-AFTER-NEXT: store <4 x double> [[TMP12]], <4 x double>* [[TMP15]], align 8 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ] ; SINK-AFTER-NEXT: br label [[SCALAR_BODY:%.*]] ; SINK-AFTER: scalar.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ] ; SINK-AFTER-NEXT: [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]] -; SINK-AFTER-NEXT: [[TMP20]] = load i16, i16* [[ARRAYIDX5]], align 2 -; SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP20]] to double +; SINK-AFTER-NEXT: [[TMP17]] = load i16, i16* [[ARRAYIDX5]], align 2 +; SINK-AFTER-NEXT: [[CONV6:%.*]] = sitofp i16 [[TMP17]] to double ; SINK-AFTER-NEXT: [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double ; SINK-AFTER-NEXT: [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]] ; SINK-AFTER-NEXT: [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]] @@ -1240,7 +1129,7 @@ define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 ; SINK-AFTER-NEXT: [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1 ; SINK-AFTER-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; SINK-AFTER: for.end.loopexit: ; SINK-AFTER-NEXT: br label [[FOR_END]] ; SINK-AFTER: for.end: @@ -1641,7 +1530,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1652,7 +1541,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -1671,7 +1560,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1682,7 +1571,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ] ; UNROLL-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 ; UNROLL-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; UNROLL-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -1747,7 +1636,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP42]], i32 3 @@ -1764,7 +1653,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]] ; UNROLL-NO-IC-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -1794,7 +1683,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[TMP8]] = load i32, i32* [[TMP6]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1809,7 +1698,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; UNROLL-NO-VF-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]] ; UNROLL-NO-VF-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -1853,7 +1742,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 @@ -1870,7 +1759,7 @@ define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) { ; SINK-AFTER-NEXT: [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]] ; SINK-AFTER-NEXT: [[VAR2]] = load i32, i32* [[VAR1]], align 4 ; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -1898,13 +1787,13 @@ define void @constant_folded_previous_value() { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[SCALAR_BODY:%.*]] ; CHECK: scalar.body: -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -1914,13 +1803,13 @@ define void @constant_folded_previous_value() { ; UNROLL: vector.ph: ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: -; UNROLL-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[SCALAR_BODY:%.*]] ; UNROLL: scalar.body: -; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -1935,7 +1824,7 @@ define void @constant_folded_previous_value() { ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1949,7 +1838,7 @@ define void @constant_folded_previous_value() { ; UNROLL-NO-IC-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-IC-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-IC-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef -; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -1965,7 +1854,7 @@ define void @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: [[TMP1]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1979,7 +1868,7 @@ define void @constant_folded_previous_value() { ; UNROLL-NO-VF-NEXT: [[VAR3]] = add i64 0, 1 ; UNROLL-NO-VF-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; UNROLL-NO-VF-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef -; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -1994,7 +1883,7 @@ define void @constant_folded_previous_value() { ; SINK-AFTER-NEXT: [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> , <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef -; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 undef, undef ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2008,7 +1897,7 @@ define void @constant_folded_previous_value() { ; SINK-AFTER-NEXT: [[VAR3]] = add i64 0, 1 ; SINK-AFTER-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; SINK-AFTER-NEXT: [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef -; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -2045,14 +1934,14 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1]] = add i32 [[TMP0]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP0]], [[X:%.*]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2068,7 +1957,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP1]] = add i32 [[TMP0]], 8 ; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 4 ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP3]], [[X:%.*]] @@ -2076,7 +1965,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2102,7 +1991,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 @@ -2119,7 +2008,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NO-IC-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; UNROLL-NO-IC-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; UNROLL-NO-IC-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 -; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-IC-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2138,7 +2027,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NO-VF-NEXT: [[TMP1]] = add i32 [[INDUCTION1]], [[X]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -2153,7 +2042,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; UNROLL-NO-VF-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; UNROLL-NO-VF-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; UNROLL-NO-VF-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 -; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ] ; UNROLL-NO-VF-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2174,7 +2063,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 -; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 96, 96 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 @@ -2191,7 +2080,7 @@ define i32 @extract_second_last_iteration(i32* %cval, i32 %x) { ; SINK-AFTER-NEXT: [[BC:%.*]] = zext i32 [[INC_PHI]] to i64 ; SINK-AFTER-NEXT: [[ADDX]] = add i32 [[INC_PHI]], [[X]] ; SINK-AFTER-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95 -; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] ; SINK-AFTER-NEXT: ret i32 [[VAL_PHI_LCSSA]] @@ -2272,7 +2161,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; CHECK-NEXT: [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP23]]) ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -2282,7 +2171,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; CHECK-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[A_1_LCSSA]] ; CHECK: for.body: -; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; ; UNROLL-LABEL: @PR33613( ; UNROLL-NEXT: entry: @@ -2355,7 +2244,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NEXT: [[TMP48]] = add <4 x i32> [[VEC_PHI9]], [[TMP46]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP48]], [[TMP47]] ; UNROLL-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -2366,7 +2255,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NEXT: [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[A_1_LCSSA]] ; UNROLL: for.body: -; UNROLL-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @PR33613( ; UNROLL-NO-IC-NEXT: entry: @@ -2441,7 +2330,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: [[TMP49]] = add <4 x i32> [[VEC_PHI9]], [[TMP47]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]] ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -2472,7 +2361,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1 ; UNROLL-NO-IC-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240 -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @PR33613( ; UNROLL-NO-VF-NEXT: entry: @@ -2506,7 +2395,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NO-VF-NEXT: [[TMP15]] = add i32 [[VEC_PHI3]], [[TMP13]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]] ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240 @@ -2534,7 +2423,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; UNROLL-NO-VF-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1 ; UNROLL-NO-VF-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240 -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; ; SINK-AFTER-LABEL: @PR33613( ; SINK-AFTER-NEXT: entry: @@ -2579,7 +2468,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240 -; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 10240, 10240 @@ -2609,7 +2498,7 @@ define i32 @PR33613(double* %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: [[INC1]] = add nuw nsw i32 [[I_011]], 1 ; SINK-AFTER-NEXT: [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240 -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; entry: %idxprom = sext i32 %d to i64 @@ -2643,23 +2532,12 @@ for.body: ; ; Check that the sext sank after the load in the vector loop. ; -define void @sink_after(i16* %a, i32* %b, i64 %n) { +define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) { ; CHECK-LABEL: @sink_after( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]] -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32* -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16* -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3 @@ -2667,41 +2545,41 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !21 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP11]], align 4, !alias.scope !24, !noalias !21 +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP13]] = load i16, i16* [[ARRAYIDX2]], align 2 -; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP10]] = load i16, i16* [[ARRAYIDX2]], align 2 +; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -2709,91 +2587,67 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: entry: ; UNROLL-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL: vector.memcheck: -; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]] -; UNROLL-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; UNROLL-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32* -; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]] -; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16* -; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]] -; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 ; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] -; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !21 -; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4 -; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2, !alias.scope !21 -; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> -; UNROLL-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32> -; UNROLL-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[TMP9]] to <4 x i32> -; UNROLL-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; UNROLL-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32> -; UNROLL-NEXT: [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP10]] -; UNROLL-NEXT: [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]] -; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; UNROLL-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP17]], align 4, !alias.scope !24, !noalias !21 -; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i64 4 -; UNROLL-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP19]], align 4, !alias.scope !24, !noalias !21 +; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 +; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]] +; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>* +; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2 +; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i64 4 +; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>* +; UNROLL-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2 +; UNROLL-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> +; UNROLL-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> +; UNROLL-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; UNROLL-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32> +; UNROLL-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP7]] +; UNROLL-NEXT: [[TMP12:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP8]] +; UNROLL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; UNROLL-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP14]], align 4 +; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 4 +; UNROLL-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP16]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; UNROLL-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] -; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i64 3 +; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ] +; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NEXT: [[TMP21]] = load i16, i16* [[ARRAYIDX2]], align 2 -; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP21]] to i32 +; UNROLL-NEXT: [[TMP18]] = load i16, i16* [[ARRAYIDX2]], align 2 +; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP18]] to i32 ; UNROLL-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; ; UNROLL-NO-IC-LABEL: @sink_after( ; UNROLL-NO-IC-NEXT: entry: -; UNROLL-NO-IC-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-IC: vector.memcheck: -; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]] -; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; UNROLL-NO-IC-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NO-IC-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8* -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; UNROLL-NO-IC-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* -; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]] -; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]] -; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -2801,150 +2655,124 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2, !alias.scope !21 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !21 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>* +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[TMP10]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP14]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP13]] -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> [[TMP16]], [[TMP14]] +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4, !alias.scope !24, !noalias !21 -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4, !alias.scope !24, !noalias !21 +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP21]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP23]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 3 -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-IC-NEXT: [[TMP26]] = load i16, i16* [[ARRAYIDX2]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP26]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP25]] = load i16, i16* [[ARRAYIDX2]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP25]] to i32 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; ; UNROLL-NO-VF-LABEL: @sink_after( ; UNROLL-NO-VF-NEXT: entry: -; UNROLL-NO-VF-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-VF: vector.memcheck: -; UNROLL-NO-VF-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]] -; UNROLL-NO-VF-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; UNROLL-NO-VF-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NO-VF-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8* -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; UNROLL-NO-VF-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; UNROLL-NO-VF-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* -; UNROLL-NO-VF-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]] -; UNROLL-NO-VF-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]] -; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NO-VF-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION]], 1 -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1 +; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDUCTION]], 1 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2, !alias.scope !20 -; UNROLL-NO-VF-NEXT: [[TMP6]] = load i16, i16* [[TMP4]], align 2, !alias.scope !20 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP5]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP5]] = load i16, i16* [[TMP3]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[TMP4]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP4]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP5]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[TMP6]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]] ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]] -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]] -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]] -; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP13]], align 4, !alias.scope !23, !noalias !20 -; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], i32* [[TMP14]], align 4, !alias.scope !23, !noalias !20 +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]] +; UNROLL-NO-VF-NEXT: store i32 [[TMP10]], i32* [[TMP12]], align 4 +; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP13]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-VF-NEXT: [[TMP16]] = load i16, i16* [[ARRAYIDX2]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP16]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP15]] = load i16, i16* [[ARRAYIDX2]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP15]] to i32 ; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; ; SINK-AFTER-LABEL: @sink_after( ; SINK-AFTER-NEXT: entry: -; SINK-AFTER-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 -; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; SINK-AFTER: vector.memcheck: -; SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]] -; SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; SINK-AFTER-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; SINK-AFTER-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8* -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; SINK-AFTER-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; SINK-AFTER-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* -; SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]] -; SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]] -; SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -2953,45 +2781,45 @@ define void @sink_after(i16* %a, i32* %b, i64 %n) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] -; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !21 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP7]] -; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 -; SINK-AFTER-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4, !alias.scope !24, !noalias !21 +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]] +; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i32 0 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>* +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2 +; SINK-AFTER-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP6:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP6]] +; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0 +; SINK-AFTER-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>* +; SINK-AFTER-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; SINK-AFTER-NEXT: [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2 -; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +; SINK-AFTER-NEXT: [[TMP13]] = load i16, i16* [[ARRAYIDX2]], align 2 +; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32 ; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -3033,84 +2861,70 @@ for.end: ; ; Check that the sext sank after the load in the vector loop. ; -define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { +define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i64 %n) { ; CHECK-LABEL: @PR34711( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 -; CHECK-NEXT: [[C2:%.*]] = ptrtoint i32* [[C:%.*]] to i64 -; CHECK-NEXT: [[A3:%.*]] = ptrtoint [2 x i16]* [[A]] to i64 -; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[C2]] -; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[A3]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[C2]] -; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 16 -; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[TMP1]] -; CHECK-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP3]], 16 -; CHECK-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] -; CHECK-NEXT: br i1 [[CONFLICT_RDX6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP8]], align 2 -; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP9]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP10]], align 2 -; CHECK-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP11]], align 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i64 1 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> [[TMP18]], i16 [[TMP15]], i64 2 -; CHECK-NEXT: [[TMP20]] = insertelement <4 x i16> [[TMP19]], i16 [[TMP16]], i64 3 -; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP19]], <4 x i32> -; CHECK-NEXT: [[TMP22:%.*]] = sext <4 x i16> [[TMP21]] to <4 x i32> -; CHECK-NEXT: [[TMP23:%.*]] = sext <4 x i16> [[TMP20]] to <4 x i32> -; CHECK-NEXT: [[TMP24:%.*]] = mul nsw <4 x i32> [[TMP23]], [[TMP22]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP26]], align 4 +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> , <4 x i32>* [[TMP8]], align 4 +; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[TMP4]], align 2 +; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[TMP5]], align 2 +; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP6]], align 2 +; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP7]], align 2 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i16> poison, i16 [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> [[TMP13]], i16 [[TMP10]], i64 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i16> [[TMP14]], i16 [[TMP11]], i64 2 +; CHECK-NEXT: [[TMP16]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i64 3 +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP15]], <4 x i32> +; CHECK-NEXT: [[TMP18:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32> +; CHECK-NEXT: [[TMP19:%.*]] = sext <4 x i16> [[TMP16]] to <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = mul nsw <4 x i32> [[TMP19]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1 ; CHECK-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4 ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; CHECK-NEXT: [[TMP28]] = load i16, i16* [[CUR_INDEX]], align 2 -; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP28]] to i32 +; CHECK-NEXT: [[TMP24]] = load i16, i16* [[CUR_INDEX]], align 2 +; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP24]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -3119,127 +2933,99 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0 ; UNROLL-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL: vector.memcheck: -; UNROLL-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 -; UNROLL-NEXT: [[C2:%.*]] = ptrtoint i32* [[C:%.*]] to i64 -; UNROLL-NEXT: [[A3:%.*]] = ptrtoint [2 x i16]* [[A]] to i64 -; UNROLL-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[C2]] -; UNROLL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 -; UNROLL-NEXT: [[TMP1:%.*]] = add nuw i64 [[A3]], 2 -; UNROLL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[C2]] -; UNROLL-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 32 -; UNROLL-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; UNROLL-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[TMP1]] -; UNROLL-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP3]], 32 -; UNROLL-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] -; UNROLL-NEXT: br i1 [[CONFLICT_RDX6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 ; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 2 -; UNROLL-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 3 -; UNROLL-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 4 -; UNROLL-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 5 -; UNROLL-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 6 -; UNROLL-NEXT: [[TMP10:%.*]] = or i64 [[INDEX]], 7 -; UNROLL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]] -; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1 +; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 +; UNROLL-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 +; UNROLL-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 +; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 4 +; UNROLL-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 5 +; UNROLL-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 6 +; UNROLL-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 7 +; UNROLL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDEX]] +; UNROLL-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1 +; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1 +; UNROLL-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1 +; UNROLL-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1 +; UNROLL-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1 ; UNROLL-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1 ; UNROLL-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1 ; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1 -; UNROLL-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1 -; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP8]], i64 1 -; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP9]], i64 1 -; UNROLL-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP10]], i64 1 -; UNROLL-NEXT: [[TMP20:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> , <4 x i32>* [[TMP20]], align 4 -; UNROLL-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 4 -; UNROLL-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> , <4 x i32>* [[TMP22]], align 4 -; UNROLL-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP12]], align 2 -; UNROLL-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP13]], align 2 -; UNROLL-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP14]], align 2 -; UNROLL-NEXT: [[TMP26:%.*]] = load i16, i16* [[TMP15]], align 2 -; UNROLL-NEXT: [[TMP27:%.*]] = insertelement <4 x i16> poison, i16 [[TMP23]], i64 0 -; UNROLL-NEXT: [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i64 1 -; UNROLL-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i64 2 -; UNROLL-NEXT: [[TMP30:%.*]] = insertelement <4 x i16> [[TMP29]], i16 [[TMP26]], i64 3 -; UNROLL-NEXT: [[TMP31:%.*]] = load i16, i16* [[TMP16]], align 2 -; UNROLL-NEXT: [[TMP32:%.*]] = load i16, i16* [[TMP17]], align 2 -; UNROLL-NEXT: [[TMP33:%.*]] = load i16, i16* [[TMP18]], align 2 -; UNROLL-NEXT: [[TMP34:%.*]] = load i16, i16* [[TMP19]], align 2 -; UNROLL-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i64 0 -; UNROLL-NEXT: [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i64 1 -; UNROLL-NEXT: [[TMP37:%.*]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i64 2 -; UNROLL-NEXT: [[TMP38]] = insertelement <4 x i16> [[TMP37]], i16 [[TMP34]], i64 3 -; UNROLL-NEXT: [[TMP39:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> -; UNROLL-NEXT: [[TMP40:%.*]] = shufflevector <4 x i16> [[TMP30]], <4 x i16> [[TMP37]], <4 x i32> -; UNROLL-NEXT: [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32> -; UNROLL-NEXT: [[TMP42:%.*]] = sext <4 x i16> [[TMP40]] to <4 x i32> -; UNROLL-NEXT: [[TMP43:%.*]] = sext <4 x i16> [[TMP30]] to <4 x i32> -; UNROLL-NEXT: [[TMP44:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32> -; UNROLL-NEXT: [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]] -; UNROLL-NEXT: [[TMP46:%.*]] = mul nsw <4 x i32> [[TMP44]], [[TMP42]] -; UNROLL-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; UNROLL-NEXT: [[TMP48:%.*]] = bitcast i32* [[TMP47]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP48]], align 4 -; UNROLL-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, i32* [[TMP47]], i64 4 -; UNROLL-NEXT: [[TMP50:%.*]] = bitcast i32* [[TMP49]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP46]], <4 x i32>* [[TMP50]], align 4 +; UNROLL-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> , <4 x i32>* [[TMP16]], align 4 +; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i64 4 +; UNROLL-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> , <4 x i32>* [[TMP18]], align 4 +; UNROLL-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP8]], align 2 +; UNROLL-NEXT: [[TMP20:%.*]] = load i16, i16* [[TMP9]], align 2 +; UNROLL-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP10]], align 2 +; UNROLL-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP11]], align 2 +; UNROLL-NEXT: [[TMP23:%.*]] = insertelement <4 x i16> poison, i16 [[TMP19]], i64 0 +; UNROLL-NEXT: [[TMP24:%.*]] = insertelement <4 x i16> [[TMP23]], i16 [[TMP20]], i64 1 +; UNROLL-NEXT: [[TMP25:%.*]] = insertelement <4 x i16> [[TMP24]], i16 [[TMP21]], i64 2 +; UNROLL-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP25]], i16 [[TMP22]], i64 3 +; UNROLL-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP12]], align 2 +; UNROLL-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP13]], align 2 +; UNROLL-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP14]], align 2 +; UNROLL-NEXT: [[TMP30:%.*]] = load i16, i16* [[TMP15]], align 2 +; UNROLL-NEXT: [[TMP31:%.*]] = insertelement <4 x i16> poison, i16 [[TMP27]], i64 0 +; UNROLL-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP31]], i16 [[TMP28]], i64 1 +; UNROLL-NEXT: [[TMP33:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP29]], i64 2 +; UNROLL-NEXT: [[TMP34]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP30]], i64 3 +; UNROLL-NEXT: [[TMP35:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP25]], <4 x i32> +; UNROLL-NEXT: [[TMP36:%.*]] = shufflevector <4 x i16> [[TMP26]], <4 x i16> [[TMP33]], <4 x i32> +; UNROLL-NEXT: [[TMP37:%.*]] = sext <4 x i16> [[TMP35]] to <4 x i32> +; UNROLL-NEXT: [[TMP38:%.*]] = sext <4 x i16> [[TMP36]] to <4 x i32> +; UNROLL-NEXT: [[TMP39:%.*]] = sext <4 x i16> [[TMP26]] to <4 x i32> +; UNROLL-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP34]] to <4 x i32> +; UNROLL-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]] +; UNROLL-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]] +; UNROLL-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; UNROLL-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP41]], <4 x i32>* [[TMP44]], align 4 +; UNROLL-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, i32* [[TMP43]], i64 4 +; UNROLL-NEXT: [[TMP46:%.*]] = bitcast i32* [[TMP45]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP42]], <4 x i32>* [[TMP46]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; UNROLL-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP30]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP52:%.*]], [[FOR_BODY]] ] +; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP48:%.*]], [[FOR_BODY]] ] ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]] ; UNROLL-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1 ; UNROLL-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4 ; UNROLL-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; UNROLL-NEXT: [[TMP52]] = load i16, i16* [[CUR_INDEX]], align 2 -; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP52]] to i32 +; UNROLL-NEXT: [[TMP48]] = load i16, i16* [[CUR_INDEX]], align 2 +; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP48]] to i32 ; UNROLL-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; ; UNROLL-NO-IC-LABEL: @PR34711( ; UNROLL-NO-IC-NEXT: entry: -; UNROLL-NO-IC-NEXT: [[A3:%.*]] = ptrtoint [2 x i16]* [[A:%.*]] to i64 -; UNROLL-NO-IC-NEXT: [[C2:%.*]] = ptrtoint i32* [[C:%.*]] to i64 -; UNROLL-NO-IC-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 -; UNROLL-NO-IC-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 0, i64 0 +; UNROLL-NO-IC-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0 ; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-IC: vector.memcheck: -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[C2]] -; UNROLL-NO-IC-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw i64 [[A3]], 2 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[C2]] -; UNROLL-NO-IC-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 32 -; UNROLL-NO-IC-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[TMP1]] -; UNROLL-NO-IC-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP3]], 32 -; UNROLL-NO-IC-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] -; UNROLL-NO-IC-NEXT: br i1 [[CONFLICT_RDX6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -3247,188 +3033,160 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2 -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 4 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 5 -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 6 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 7 -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP8]] +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP0]] +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1 +; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP8]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP9]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP10]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP11]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> , <4 x i32>* [[TMP23]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP12]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> , <4 x i32>* [[TMP25]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load i16, i16* [[TMP14]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP15]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP17]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x i16> poison, i16 [[TMP26]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = insertelement <4 x i16> [[TMP30]], i16 [[TMP27]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> [[TMP31]], i16 [[TMP28]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP29]], i32 3 -; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = load i16, i16* [[TMP18]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = load i16, i16* [[TMP19]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = load i16, i16* [[TMP20]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = load i16, i16* [[TMP21]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> poison, i16 [[TMP34]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = insertelement <4 x i16> [[TMP38]], i16 [[TMP35]], i32 1 -; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP36]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP41]] = insertelement <4 x i16> [[TMP40]], i16 [[TMP37]], i32 3 -; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP33]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x i16> [[TMP33]], <4 x i16> [[TMP41]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = sext <4 x i16> [[TMP42]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = sext <4 x i16> [[TMP43]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = sext <4 x i16> [[TMP33]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = sext <4 x i16> [[TMP41]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = mul nsw <4 x i32> [[TMP46]], [[TMP44]] -; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = mul nsw <4 x i32> [[TMP47]], [[TMP45]] -; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP8]] -; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, i32* [[TMP50]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP53:%.*]] = bitcast i32* [[TMP52]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP48]], <4 x i32>* [[TMP53]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP54:%.*]] = getelementptr inbounds i32, i32* [[TMP50]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP55:%.*]] = bitcast i32* [[TMP54]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP49]], <4 x i32>* [[TMP55]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> , <4 x i32>* [[TMP19]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> , <4 x i32>* [[TMP21]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = load i16, i16* [[TMP10]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = load i16, i16* [[TMP11]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load i16, i16* [[TMP12]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load i16, i16* [[TMP13]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP22]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP23]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i16, i16* [[TMP14]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = load i16, i16* [[TMP15]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = load i16, i16* [[TMP16]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = load i16, i16* [[TMP17]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i16> poison, i16 [[TMP30]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = insertelement <4 x i16> [[TMP34]], i16 [[TMP31]], i32 1 +; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP37]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = shufflevector <4 x i16> [[TMP29]], <4 x i16> [[TMP37]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = sext <4 x i16> [[TMP29]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = sext <4 x i16> [[TMP37]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP42]], [[TMP40]] +; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]] +; UNROLL-NO-IC-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] +; UNROLL-NO-IC-NEXT: [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] +; UNROLL-NO-IC-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP44]], <4 x i32>* [[TMP49]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = bitcast i32* [[TMP50]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP51]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP56:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP56]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP41]], i32 3 -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP41]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP37]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP37]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP57:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP53:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1 ; UNROLL-NO-IC-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4 ; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; UNROLL-NO-IC-NEXT: [[TMP57]] = load i16, i16* [[CUR_INDEX]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP57]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP53]] = load i16, i16* [[CUR_INDEX]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP53]] to i32 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; ; UNROLL-NO-VF-LABEL: @PR34711( ; UNROLL-NO-VF-NEXT: entry: -; UNROLL-NO-VF-NEXT: [[A3:%.*]] = ptrtoint [2 x i16]* [[A:%.*]] to i64 -; UNROLL-NO-VF-NEXT: [[C2:%.*]] = ptrtoint i32* [[C:%.*]] to i64 -; UNROLL-NO-VF-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 -; UNROLL-NO-VF-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 0, i64 0 +; UNROLL-NO-VF-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0 ; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2 ; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-VF: vector.memcheck: -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[C2]] -; UNROLL-NO-VF-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add nuw i64 [[A3]], 2 -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[C2]] -; UNROLL-NO-VF-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 8 -; UNROLL-NO-VF-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[TMP1]] -; UNROLL-NO-VF-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP3]], 8 -; UNROLL-NO-VF-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] -; UNROLL-NO-VF-NEXT: br i1 [[CONFLICT_RDX6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION7]] -; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION]], i64 1 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION7]], i64 1 -; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP4]], align 4 -; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP5]], align 4 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = load i16, i16* [[TMP6]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP9]] = load i16, i16* [[TMP7]], align 2 -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sext i16 [[TMP8]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sext i16 [[TMP8]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sext i16 [[TMP9]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP12]], [[TMP10]] -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = mul nsw i32 [[TMP13]], [[TMP11]] -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]] -; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], i32* [[TMP16]], align 4 -; UNROLL-NO-VF-NEXT: store i32 [[TMP15]], i32* [[TMP17]], align 4 +; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION1]] +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION]], i64 1 +; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION1]], i64 1 +; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP0]], align 4 +; UNROLL-NO-VF-NEXT: store i32 7, i32* [[TMP1]], align 4 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP5]] = load i16, i16* [[TMP3]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[TMP4]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP4]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = sext i16 [[TMP5]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]] +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]] +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]] +; UNROLL-NO-VF-NEXT: store i32 [[TMP10]], i32* [[TMP12]], align 4 +; UNROLL-NO-VF-NEXT: store i32 [[TMP11]], i32* [[TMP13]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1 ; UNROLL-NO-VF-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4 ; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP19]] = load i16, i16* [[CUR_INDEX]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP19]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP15]] = load i16, i16* [[CUR_INDEX]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP15]] to i32 ; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; ; SINK-AFTER-LABEL: @PR34711( ; SINK-AFTER-NEXT: entry: -; SINK-AFTER-NEXT: [[A3:%.*]] = ptrtoint [2 x i16]* [[A:%.*]] to i64 -; SINK-AFTER-NEXT: [[C2:%.*]] = ptrtoint i32* [[C:%.*]] to i64 -; SINK-AFTER-NEXT: [[B1:%.*]] = ptrtoint i32* [[B:%.*]] to i64 -; SINK-AFTER-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 0, i64 0 +; SINK-AFTER-NEXT: [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0 ; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2 ; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 -; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; SINK-AFTER: vector.memcheck: -; SINK-AFTER-NEXT: [[TMP0:%.*]] = sub i64 [[B1]], [[C2]] -; SINK-AFTER-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16 -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add nuw i64 [[A3]], 2 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[C2]] -; SINK-AFTER-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP2]], 16 -; SINK-AFTER-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]] -; SINK-AFTER-NEXT: [[TMP3:%.*]] = sub i64 [[B1]], [[TMP1]] -; SINK-AFTER-NEXT: [[DIFF_CHECK5:%.*]] = icmp ult i64 [[TMP3]], 16 -; SINK-AFTER-NEXT: [[CONFLICT_RDX6:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK5]] -; SINK-AFTER-NEXT: br i1 [[CONFLICT_RDX6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -3436,62 +3194,62 @@ define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) { ; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]] ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2 -; SINK-AFTER-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 -; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]] -; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1 -; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1 -; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1 -; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1 -; SINK-AFTER-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 -; SINK-AFTER-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> , <4 x i32>* [[TMP14]], align 4 -; SINK-AFTER-NEXT: [[TMP15:%.*]] = load i16, i16* [[TMP9]], align 2 -; SINK-AFTER-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP10]], align 2 -; SINK-AFTER-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP11]], align 2 -; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP12]], align 2 -; SINK-AFTER-NEXT: [[TMP19:%.*]] = insertelement <4 x i16> poison, i16 [[TMP15]], i32 0 -; SINK-AFTER-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP19]], i16 [[TMP16]], i32 1 -; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i16> [[TMP20]], i16 [[TMP17]], i32 2 -; SINK-AFTER-NEXT: [[TMP22]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP18]], i32 3 -; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP22]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP24:%.*]] = sext <4 x i16> [[TMP23]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP25:%.*]] = sext <4 x i16> [[TMP22]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP26:%.*]] = mul nsw <4 x i32> [[TMP25]], [[TMP24]] -; SINK-AFTER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] -; SINK-AFTER-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 0 -; SINK-AFTER-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> [[TMP26]], <4 x i32>* [[TMP29]], align 4 +; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; SINK-AFTER-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1 +; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1 +; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1 +; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1 +; SINK-AFTER-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 +; SINK-AFTER-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* +; SINK-AFTER-NEXT: store <4 x i32> , <4 x i32>* [[TMP10]], align 4 +; SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, i16* [[TMP5]], align 2 +; SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, i16* [[TMP6]], align 2 +; SINK-AFTER-NEXT: [[TMP13:%.*]] = load i16, i16* [[TMP7]], align 2 +; SINK-AFTER-NEXT: [[TMP14:%.*]] = load i16, i16* [[TMP8]], align 2 +; SINK-AFTER-NEXT: [[TMP15:%.*]] = insertelement <4 x i16> poison, i16 [[TMP11]], i32 0 +; SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 1 +; SINK-AFTER-NEXT: [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 2 +; SINK-AFTER-NEXT: [[TMP18]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 3 +; SINK-AFTER-NEXT: [[TMP19:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP18]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP19]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP21:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP22:%.*]] = mul nsw <4 x i32> [[TMP21]], [[TMP20]] +; SINK-AFTER-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0 +; SINK-AFTER-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>* +; SINK-AFTER-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP25]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP22]], i32 3 -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP22]], i32 2 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP18]], i32 3 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP18]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP31:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1 ; SINK-AFTER-NEXT: store i32 7, i32* [[ARRAYCIDX]], align 4 ; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 -; SINK-AFTER-NEXT: [[TMP31]] = load i16, i16* [[CUR_INDEX]], align 2 -; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP31]] to i32 +; SINK-AFTER-NEXT: [[TMP27]] = load i16, i16* [[CUR_INDEX]], align 2 +; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP27]] to i32 ; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -3528,23 +3286,12 @@ for.end: ; ; -define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { +define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i64 %n) { ; CHECK-LABEL: @sink_after_with_multiple_users( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: vector.memcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]] -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32* -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]] -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16* -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3 @@ -3552,43 +3299,43 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !30 -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* -; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4, !alias.scope !33, !noalias !30 +; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>* +; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3 ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2 -; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 +; CHECK-NEXT: [[TMP11]] = load i16, i16* [[ARRAYIDX2]], align 2 +; CHECK-NEXT: [[CONV3:%.*]] = sext i16 [[TMP11]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]] ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -3596,94 +3343,70 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NEXT: entry: ; UNROLL-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL: vector.memcheck: -; UNROLL-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]] -; UNROLL-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; UNROLL-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; UNROLL-NEXT: [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32* -; UNROLL-NEXT: [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]] -; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16* -; UNROLL-NEXT: [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]] -; UNROLL-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -8 ; UNROLL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NEXT: [[TMP3:%.*]] = or i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] -; UNROLL-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !30 -; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4 -; UNROLL-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* -; UNROLL-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2, !alias.scope !30 -; UNROLL-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NEXT: [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> -; UNROLL-NEXT: [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32> -; UNROLL-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[TMP9]] to <4 x i32> -; UNROLL-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP10]], -; UNROLL-NEXT: [[TMP13:%.*]] = add nsw <4 x i32> [[TMP11]], -; UNROLL-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; UNROLL-NEXT: [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32> -; UNROLL-NEXT: [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP14]] -; UNROLL-NEXT: [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP15]] -; UNROLL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]] -; UNROLL-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4, !alias.scope !33, !noalias !30 -; UNROLL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i64 4 -; UNROLL-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>* -; UNROLL-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* [[TMP21]], align 4, !alias.scope !33, !noalias !30 +; UNROLL-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 +; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]] +; UNROLL-NEXT: [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>* +; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2 +; UNROLL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i64 4 +; UNROLL-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>* +; UNROLL-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2 +; UNROLL-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> +; UNROLL-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> +; UNROLL-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], +; UNROLL-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], +; UNROLL-NEXT: [[TMP11:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; UNROLL-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32> +; UNROLL-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]] +; UNROLL-NEXT: [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP12]] +; UNROLL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] +; UNROLL-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* [[TMP16]], align 4 +; UNROLL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 4 +; UNROLL-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>* +; UNROLL-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP18]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] -; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i64 3 +; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3 ; UNROLL-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: -; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL: for.body: -; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ] +; UNROLL-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[FOR_BODY]] ] ; UNROLL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; UNROLL-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; UNROLL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NEXT: [[TMP23]] = load i16, i16* [[ARRAYIDX2]], align 2 -; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP23]] to i32 +; UNROLL-NEXT: [[TMP20]] = load i16, i16* [[ARRAYIDX2]], align 2 +; UNROLL-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 ; UNROLL-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]] ; UNROLL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; ; UNROLL-NO-IC-LABEL: @sink_after_with_multiple_users( ; UNROLL-NO-IC-NEXT: entry: -; UNROLL-NO-IC-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; UNROLL-NO-IC-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-IC: vector.memcheck: -; UNROLL-NO-IC-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]] -; UNROLL-NO-IC-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; UNROLL-NO-IC-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NO-IC-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8* -; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; UNROLL-NO-IC-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* -; UNROLL-NO-IC-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]] -; UNROLL-NO-IC-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]] -; UNROLL-NO-IC-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NO-IC-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -3691,156 +3414,130 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP2]], 1 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2, !alias.scope !30 -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>* -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2, !alias.scope !30 -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>* +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>* +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2 +; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = sext <4 x i16> [[TMP10]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = add nsw <4 x i32> [[TMP12]], ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP13]], -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP14]], -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32> +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = mul nsw <4 x i32> [[TMP14]], [[TMP16]] ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP17]] -; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = mul nsw <4 x i32> [[TMP16]], [[TMP18]] +; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0 -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP24]], align 4, !alias.scope !33, !noalias !30 -; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 4 -; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>* -; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP26]], align 4, !alias.scope !33, !noalias !30 +; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP18]], <4 x i32>* [[TMP23]], align 4 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>* +; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP25]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 3 -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 2 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-IC-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-IC: for.body: -; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; UNROLL-NO-IC-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; UNROLL-NO-IC-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-IC-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-IC-NEXT: [[TMP28]] = load i16, i16* [[ARRAYIDX2]], align 2 -; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP28]] to i32 +; UNROLL-NO-IC-NEXT: [[TMP27]] = load i16, i16* [[ARRAYIDX2]], align 2 +; UNROLL-NO-IC-NEXT: [[CONV3:%.*]] = sext i16 [[TMP27]] to i32 ; UNROLL-NO-IC-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]] ; UNROLL-NO-IC-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-IC-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; ; UNROLL-NO-VF-LABEL: @sink_after_with_multiple_users( ; UNROLL-NO-VF-NEXT: entry: -; UNROLL-NO-VF-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; UNROLL-NO-VF-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; UNROLL-NO-VF: vector.memcheck: -; UNROLL-NO-VF-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]] -; UNROLL-NO-VF-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; UNROLL-NO-VF-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; UNROLL-NO-VF-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8* -; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; UNROLL-NO-VF-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; UNROLL-NO-VF-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* -; UNROLL-NO-VF-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]] -; UNROLL-NO-VF-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]] -; UNROLL-NO-VF-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; UNROLL-NO-VF-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL-NO-VF: vector.ph: ; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-VF: vector.body: ; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; UNROLL-NO-VF-NEXT: [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION]], 1 -; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1 +; UNROLL-NO-VF-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 +; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDUCTION]], 1 +; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1 +; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]] ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] -; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2, !alias.scope !29 -; UNROLL-NO-VF-NEXT: [[TMP6]] = load i16, i16* [[TMP4]], align 2, !alias.scope !29 -; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = sext i16 [[TMP5]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP5]] = load i16, i16* [[TMP3]], align 2 +; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sext i16 [[TMP4]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = add nsw i32 [[TMP6]], 2 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP7]], 2 -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP8]], 2 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = sext i16 [[TMP4]] to i32 ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = sext i16 [[TMP5]] to i32 -; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sext i16 [[TMP6]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = mul nsw i32 [[TMP8]], [[TMP10]] ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = mul nsw i32 [[TMP9]], [[TMP11]] -; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]] -; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]] -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]] -; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], i32* [[TMP15]], align 4, !alias.scope !32, !noalias !29 -; UNROLL-NO-VF-NEXT: store i32 [[TMP14]], i32* [[TMP16]], align 4, !alias.scope !32, !noalias !29 +; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]] +; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]] +; UNROLL-NO-VF-NEXT: store i32 [[TMP12]], i32* [[TMP14]], align 4 +; UNROLL-NO-VF-NEXT: store i32 [[TMP13]], i32* [[TMP15]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-VF: scalar.ph: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] -; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] +; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; UNROLL-NO-VF-NEXT: br label [[FOR_BODY:%.*]] ; UNROLL-NO-VF: for.body: -; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; UNROLL-NO-VF-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; UNROLL-NO-VF-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; UNROLL-NO-VF-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; UNROLL-NO-VF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; UNROLL-NO-VF-NEXT: [[TMP18]] = load i16, i16* [[ARRAYIDX2]], align 2 -; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP18]] to i32 +; UNROLL-NO-VF-NEXT: [[TMP17]] = load i16, i16* [[ARRAYIDX2]], align 2 +; UNROLL-NO-VF-NEXT: [[CONV3:%.*]] = sext i16 [[TMP17]] to i32 ; UNROLL-NO-VF-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]] ; UNROLL-NO-VF-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; UNROLL-NO-VF-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; ; SINK-AFTER-LABEL: @sink_after_with_multiple_users( ; SINK-AFTER-NEXT: entry: -; SINK-AFTER-NEXT: [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8* ; SINK-AFTER-NEXT: [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2 ; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 -; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; SINK-AFTER: vector.memcheck: -; SINK-AFTER-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]] -; SINK-AFTER-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; SINK-AFTER-NEXT: [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1 -; SINK-AFTER-NEXT: [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8* -; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; SINK-AFTER-NEXT: [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]] -; SINK-AFTER-NEXT: [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8* -; SINK-AFTER-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]] -; SINK-AFTER-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]] -; SINK-AFTER-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; SINK-AFTER-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SINK-AFTER: vector.ph: ; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] @@ -3849,47 +3546,47 @@ define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) { ; SINK-AFTER: vector.body: ; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ] -; SINK-AFTER-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; SINK-AFTER-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]] -; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0 -; SINK-AFTER-NEXT: [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>* -; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2, !alias.scope !30 -; SINK-AFTER-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> -; SINK-AFTER-NEXT: [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], -; SINK-AFTER-NEXT: [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> -; SINK-AFTER-NEXT: [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP9]] -; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] -; SINK-AFTER-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0 -; SINK-AFTER-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* -; SINK-AFTER-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP13]], align 4, !alias.scope !33, !noalias !30 +; SINK-AFTER-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; SINK-AFTER-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]] +; SINK-AFTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i32 0 +; SINK-AFTER-NEXT: [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>* +; SINK-AFTER-NEXT: [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2 +; SINK-AFTER-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> +; SINK-AFTER-NEXT: [[TMP6:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], +; SINK-AFTER-NEXT: [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32> +; SINK-AFTER-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP8]] +; SINK-AFTER-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] +; SINK-AFTER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0 +; SINK-AFTER-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* +; SINK-AFTER-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; SINK-AFTER-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: -; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] -; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] ; SINK-AFTER-NEXT: br label [[FOR_BODY:%.*]] ; SINK-AFTER: for.body: -; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ] +; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; SINK-AFTER-NEXT: [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32 ; SINK-AFTER-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 2 ; SINK-AFTER-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; SINK-AFTER-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]] -; SINK-AFTER-NEXT: [[TMP15]] = load i16, i16* [[ARRAYIDX2]], align 2 -; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP15]] to i32 +; SINK-AFTER-NEXT: [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2 +; SINK-AFTER-NEXT: [[CONV3:%.*]] = sext i16 [[TMP14]] to i32 ; SINK-AFTER-NEXT: [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]] ; SINK-AFTER-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] ; SINK-AFTER-NEXT: store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4 ; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -4082,7 +3779,7 @@ define void @sink_dead_inst() { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1]] = add i16 [[TMP0]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP3:%.*]] = or i16 [[TMP0]], 1 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = zext i16 [[TMP3]] to i32 @@ -4097,7 +3794,7 @@ define void @sink_dead_inst() { ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15 ; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; CHECK-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -4112,7 +3809,7 @@ define void @sink_dead_inst() { ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP1]] = add i16 [[TMP0]], 8 ; UNROLL-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP3:%.*]] = or i16 [[TMP0]], 5 ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = zext i16 [[TMP3]] to i32 @@ -4127,7 +3824,7 @@ define void @sink_dead_inst() { ; UNROLL-NEXT: [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR6]], 15 ; UNROLL-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 -; UNROLL-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +; UNROLL-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -4155,7 +3852,7 @@ define void @sink_dead_inst() { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 40 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3 @@ -4177,7 +3874,7 @@ define void @sink_dead_inst() { ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-IC-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; UNROLL-NO-IC-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 -; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -4202,7 +3899,7 @@ define void @sink_dead_inst() { ; UNROLL-NO-VF-NEXT: [[TMP6]] = add i16 [[TMP2]], 5 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42 -; UNROLL-NO-VF-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 42 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -4220,7 +3917,7 @@ define void @sink_dead_inst() { ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; UNROLL-NO-VF-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 -; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -4242,7 +3939,7 @@ define void @sink_dead_inst() { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40 -; SINK-AFTER-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 43, 40 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3 @@ -4264,7 +3961,7 @@ define void @sink_dead_inst() { ; SINK-AFTER-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 ; SINK-AFTER-NEXT: [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32 ; SINK-AFTER-NEXT: [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5 -; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ; @@ -4347,7 +4044,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; CHECK-NEXT: [[TMP22]] = add <4 x i32> [[VEC_PHI]], [[TMP21]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39:![0-9]+]], !llvm.loop [[LOOP40:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24:![0-9]+]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP22]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) @@ -4358,7 +4055,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; CHECK-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR]] ; CHECK: bb2: -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF41:![0-9]+]], !llvm.loop [[LOOP42:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]] ; ; UNROLL-LABEL: @sink_into_replication_region( ; UNROLL-NEXT: bb: @@ -4460,7 +4157,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NEXT: [[TMP45]] = add <4 x i32> [[VEC_PHI1]], [[TMP43]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP46:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39:![0-9]+]], !llvm.loop [[LOOP40:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24:![0-9]+]], !llvm.loop [[LOOP25:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP47:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI1]] ; UNROLL-NEXT: [[TMP48:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP44]], <4 x i32> [[VEC_PHI]] @@ -4473,7 +4170,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[VAR]] ; UNROLL: bb2: -; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF41:![0-9]+]], !llvm.loop [[LOOP42:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @sink_into_replication_region( ; UNROLL-NO-IC-NEXT: bb: @@ -4582,7 +4279,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39:![0-9]+]], !llvm.loop [[LOOP40:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24:![0-9]+]], !llvm.loop [[LOOP25:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]] ; UNROLL-NO-IC-NEXT: [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -4605,7 +4302,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-IC-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF41:![0-9]+]], !llvm.loop [[LOOP42:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @sink_into_replication_region( ; UNROLL-NO-VF-NEXT: bb: @@ -4650,7 +4347,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI1]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF38:![0-9]+]], !llvm.loop [[LOOP39:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF23:![0-9]+]], !llvm.loop [[LOOP24:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP10]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]] @@ -4670,7 +4367,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; UNROLL-NO-VF-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF40:![0-9]+]], !llvm.loop [[LOOP41:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF25:![0-9]+]], !llvm.loop [[LOOP26:![0-9]+]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region( ; SINK-AFTER-NEXT: bb: @@ -4737,7 +4434,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]] ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39:![0-9]+]], !llvm.loop [[LOOP40:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24:![0-9]+]], !llvm.loop [[LOOP25:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]]) ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 @@ -4759,7 +4456,7 @@ define i32 @sink_into_replication_region(i32 %y) { ; SINK-AFTER-NEXT: [[VAR7]] = udiv i32 219220132, [[VAR3]] ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF41:![0-9]+]], !llvm.loop [[LOOP42:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]] ; bb: br label %bb2 @@ -4874,7 +4571,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP39:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP22]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[TMP40:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP39]]) @@ -4885,7 +4582,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; CHECK-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP40]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[VAR]] ; CHECK: bb2: -; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF41]], !llvm.loop [[LOOP44:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF26]], !llvm.loop [[LOOP29:![0-9]+]] ; ; UNROLL-LABEL: @sink_into_replication_region_multiple( ; UNROLL-NEXT: bb: @@ -5057,7 +4754,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; UNROLL-NEXT: [[TMP77:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NEXT: br i1 [[TMP77]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39]], !llvm.loop [[LOOP43:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP77]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: [[TMP78:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI3]] ; UNROLL-NEXT: [[TMP79:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP44]], <4 x i32> [[VEC_PHI]] @@ -5070,7 +4767,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NEXT: [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP80]], [[MIDDLE_BLOCK]] ] ; UNROLL-NEXT: ret i32 [[VAR]] ; UNROLL: bb2: -; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF41]], !llvm.loop [[LOOP44:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF26]], !llvm.loop [[LOOP29:![0-9]+]] ; ; UNROLL-NO-IC-LABEL: @sink_into_replication_region_multiple( ; UNROLL-NO-IC-NEXT: bb: @@ -5242,7 +4939,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39]], !llvm.loop [[LOOP43:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24]], !llvm.loop [[LOOP28:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP73]], [[TMP72]] ; UNROLL-NO-IC-NEXT: [[TMP75:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) @@ -5270,7 +4967,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NO-IC-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-IC-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; UNROLL-NO-IC-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF41]], !llvm.loop [[LOOP44:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF26]], !llvm.loop [[LOOP29:![0-9]+]] ; ; UNROLL-NO-VF-LABEL: @sink_into_replication_region_multiple( ; UNROLL-NO-VF-NEXT: bb: @@ -5329,7 +5026,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI2]] ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF38]], !llvm.loop [[LOOP42:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF23]], !llvm.loop [[LOOP27:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]] ; UNROLL-NO-VF-NEXT: br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]] @@ -5354,7 +5051,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; UNROLL-NO-VF-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; UNROLL-NO-VF-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; UNROLL-NO-VF-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF40]], !llvm.loop [[LOOP43:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF25]], !llvm.loop [[LOOP28:![0-9]+]] ; ; SINK-AFTER-LABEL: @sink_into_replication_region_multiple( ; SINK-AFTER-NEXT: bb: @@ -5452,7 +5149,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF39]], !llvm.loop [[LOOP43:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24]], !llvm.loop [[LOOP28:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]]) ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 @@ -5479,7 +5176,7 @@ define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) { ; SINK-AFTER-NEXT: [[VAR8]] = add nsw i32 [[VAR3]], -1 ; SINK-AFTER-NEXT: [[IV_NEXT]] = add nsw i32 [[IV]], 1 ; SINK-AFTER-NEXT: [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2 -; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF41]], !llvm.loop [[LOOP44:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF26]], !llvm.loop [[LOOP29:![0-9]+]] ; bb: br label %bb2 @@ -5521,13 +5218,13 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP46:![0-9]+]] +; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -5549,13 +5246,13 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* [[TMP4]], align 4 ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; UNROLL-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; UNROLL: middle.block: ; UNROLL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: br label [[LOOP:%.*]] ; UNROLL: loop: -; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP46:![0-9]+]] +; UNROLL-NEXT: br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] ; UNROLL: for.end: ; UNROLL-NEXT: ret void ; @@ -5591,7 +5288,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3 @@ -5614,7 +5311,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NO-IC-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; UNROLL-NO-IC-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]] ; UNROLL-NO-IC-NEXT: store i32 0, i32* [[A_GEP]], align 4 -; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP46:![0-9]+]] +; UNROLL-NO-IC-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] ; UNROLL-NO-IC: for.end: ; UNROLL-NO-IC-NEXT: ret void ; @@ -5641,7 +5338,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NO-VF-NEXT: store i32 0, i32* [[TMP7]], align 4 ; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; UNROLL-NO-VF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; UNROLL-NO-VF: middle.block: ; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16 ; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -5662,7 +5359,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; UNROLL-NO-VF-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; UNROLL-NO-VF-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]] ; UNROLL-NO-VF-NEXT: store i32 0, i32* [[A_GEP]], align 4 -; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP45:![0-9]+]] +; UNROLL-NO-VF-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP30:![0-9]+]] ; UNROLL-NO-VF: for.end: ; UNROLL-NO-VF-NEXT: ret void ; @@ -5688,7 +5385,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], ; SINK-AFTER-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 16, 16 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3 @@ -5711,7 +5408,7 @@ define void @sink_after_dead_inst(i32* %A.ptr) { ; SINK-AFTER-NEXT: [[EXT:%.*]] = zext i1 [[B3]] to i32 ; SINK-AFTER-NEXT: [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]] ; SINK-AFTER-NEXT: store i32 0, i32* [[A_GEP]], align 4 -; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP46:![0-9]+]] +; SINK-AFTER-NEXT: br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] ; SINK-AFTER: for.end: ; SINK-AFTER-NEXT: ret void ;