diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 9076dd21d218a3..c212129c051a86 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -631,11 +631,19 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost( { ISD::FADD, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ { ISD::FSUB, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ { ISD::FMUL, MVT::v8f64, 1 }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::f64, 4 }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::v2f64, 4 }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::v4f64, 8 }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::v8f64, 16 }, // Skylake from http://www.agner.org/ { ISD::FNEG, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ { ISD::FADD, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ { ISD::FSUB, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ { ISD::FMUL, MVT::v16f32, 1 }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::f32, 3 }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::v4f32, 3 }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::v8f32, 5 }, // Skylake from http://www.agner.org/ + { ISD::FDIV, MVT::v16f32, 10 }, // Skylake from http://www.agner.org/ }; if (ST->hasAVX512()) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll index 89059c0681da4a..3ac54ef8332419 100644 --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -581,14 +581,14 @@ define i32 @fdiv(i32 %arg) { ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'fdiv' -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = fdiv float undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F64 = fdiv double undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F32 = fdiv float undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fdiv double undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fdiv <8 x double> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'fdiv' diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll index 0faf4274c5deb7..0387ed4cb3e7b8 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll @@ -18,154 +18,126 @@ define void @vdiv(double* %x, double* %y, double %a, i32 %N) #0 { ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER8:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER17:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; CHECK: vector.memcheck: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr double, double* [[Y:%.*]], i64 [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP6]], [[X]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER8]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER17]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292 +; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -4 -; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 3 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], 12 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] -; CHECK: vector.ph.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP2]], 9223372036854775804 -; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP6:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT11]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT13]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT15:%.*]] = insertelement <4 x double> poison, double [[A]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT15]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT12]] +; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT14]] +; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT16]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[VECTOR_PH_NEW]] ], [ [[NITER_NSUB_3:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP4]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP5]], align 8, !tbaa [[TBAA3:![0-9]+]], !alias.scope !7 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[TMP6]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, <4 x double>* [[TMP7]], align 8, !tbaa [[TBAA3]], !alias.scope !7 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 8 ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[TMP8]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, <4 x double>* [[TMP9]], align 8, !tbaa [[TBAA3:![0-9]+]], !alias.scope !7 -; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP4]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP10]], <4 x double>* [[TMP12]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 -; CHECK-NEXT: [[INDEX_NEXT:%.*]] = or i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_NEXT]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x double>, <4 x double>* [[TMP14]], align 8, !tbaa [[TBAA3]], !alias.scope !7 -; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_1]], [[TMP5]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_NEXT]] +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x double>, <4 x double>* [[TMP9]], align 8, !tbaa [[TBAA3]], !alias.scope !7 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 12 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[TMP10]] to <4 x double>* +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x double>, <4 x double>* [[TMP11]], align 8, !tbaa [[TBAA3]], !alias.scope !7 +; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP0]] +; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP1]] +; CHECK-NEXT: [[TMP14:%.*]] = fmul fast <4 x double> [[WIDE_LOAD9]], [[TMP2]] +; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <4 x double> [[WIDE_LOAD10]], [[TMP3]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP17]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 -; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = or i64 [[INDEX]], 8 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_NEXT_1]] +; CHECK-NEXT: store <4 x double> [[TMP12]], <4 x double>* [[TMP17]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 4 ; CHECK-NEXT: [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x double>, <4 x double>* [[TMP19]], align 8, !tbaa [[TBAA3]], !alias.scope !7 -; CHECK-NEXT: [[TMP20:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_2]], [[TMP6]] -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_NEXT_1]] -; CHECK-NEXT: [[TMP22:%.*]] = bitcast double* [[TMP21]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP20]], <4 x double>* [[TMP22]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 -; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = or i64 [[INDEX]], 12 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_NEXT_2]] -; CHECK-NEXT: [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x double>, <4 x double>* [[TMP24]], align 8, !tbaa [[TBAA3]], !alias.scope !7 -; CHECK-NEXT: [[TMP25:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_3]], [[TMP7]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_NEXT_2]] -; CHECK-NEXT: [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP25]], <4 x double>* [[TMP27]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 -; CHECK-NEXT: [[INDEX_NEXT_3]] = add i64 [[INDEX]], 16 -; CHECK-NEXT: [[NITER_NSUB_3]] = add i64 [[NITER]], -4 -; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NSUB_3]], 0 -; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[MIDDLE_BLOCK_UNR_LCSSA]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: middle.block.unr-lcssa: -; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT_3]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]] -; CHECK: vector.body.epil.preheader: -; CHECK-NEXT: [[TMP28:%.*]] = fdiv fast <4 x double> , [[BROADCAST_SPLAT]] -; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] -; CHECK: vector.body.epil: -; CHECK-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ], [ [[INDEX_NEXT_EPIL:%.*]], [[VECTOR_BODY_EPIL]] ] -; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[VECTOR_BODY_EPIL_PREHEADER]] ], [ [[EPIL_ITER_SUB:%.*]], [[VECTOR_BODY_EPIL]] ] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDEX_EPIL]] -; CHECK-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* -; CHECK-NEXT: [[WIDE_LOAD_EPIL:%.*]] = load <4 x double>, <4 x double>* [[TMP30]], align 8, !tbaa [[TBAA3]], !alias.scope !7 -; CHECK-NEXT: [[TMP31:%.*]] = fmul fast <4 x double> [[WIDE_LOAD_EPIL]], [[TMP28]] -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDEX_EPIL]] -; CHECK-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>* -; CHECK-NEXT: store <4 x double> [[TMP31]], <4 x double>* [[TMP33]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 -; CHECK-NEXT: [[INDEX_NEXT_EPIL]] = add i64 [[INDEX_EPIL]], 4 -; CHECK-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 -; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY_EPIL]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: store <4 x double> [[TMP13]], <4 x double>* [[TMP19]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 8 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP14]], <4 x double>* [[TMP21]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[TMP16]], i64 12 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP22]] to <4 x double>* +; CHECK-NEXT: store <4 x double> [[TMP15]], <4 x double>* [[TMP23]], align 8, !tbaa [[TBAA3]], !alias.scope !10, !noalias !7 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER8]] -; CHECK: for.body.preheader8: +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER17]] +; CHECK: for.body.preheader17: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP34:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 -; CHECK-NEXT: [[TMP35:%.*]] = add nsw i64 [[TMP34]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: [[XTRAITER9:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3 -; CHECK-NEXT: [[LCMP_MOD10_NOT:%.*]] = icmp eq i64 [[XTRAITER9]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD10_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]] +; CHECK-NEXT: [[TMP25:%.*]] = xor i64 [[INDVARS_IV_PH]], -1 +; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP25]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 3 +; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]], label [[FOR_BODY_PROL_PREHEADER:%.*]] ; CHECK: for.body.prol.preheader: -; CHECK-NEXT: [[TMP36:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]] ; CHECK: for.body.prol: ; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] -; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ], [ [[XTRAITER9]], [[FOR_BODY_PROL_PREHEADER]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ], [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_PROL]] ; CHECK-NEXT: [[T0_PROL:%.*]] = load double, double* [[ARRAYIDX_PROL]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP37:%.*]] = fmul fast double [[T0_PROL]], [[TMP36]] +; CHECK-NEXT: [[TMP28:%.*]] = fmul fast double [[T0_PROL]], [[TMP27]] ; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_PROL]] -; CHECK-NEXT: store double [[TMP37]], double* [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP28]], double* [[ARRAYIDX2_PROL]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 ; CHECK-NEXT: [[PROL_ITER_SUB]] = add i64 [[PROL_ITER]], -1 ; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label [[FOR_BODY_PROL_LOOPEXIT]], label [[FOR_BODY_PROL]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: for.body.prol.loopexit: -; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER8]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] -; CHECK-NEXT: [[TMP38:%.*]] = icmp ult i64 [[TMP35]], 3 -; CHECK-NEXT: br i1 [[TMP38]], label [[FOR_END]], label [[FOR_BODY_PREHEADER8_NEW:%.*]] -; CHECK: for.body.preheader8.new: -; CHECK-NEXT: [[TMP39:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP40:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP41:%.*]] = fdiv fast double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP42:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER17]] ], [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[TMP29:%.*]] = icmp ult i64 [[TMP26]], 3 +; CHECK-NEXT: br i1 [[TMP29]], label [[FOR_END]], label [[FOR_BODY_PREHEADER17_NEW:%.*]] +; CHECK: for.body.preheader17.new: +; CHECK-NEXT: [[TMP30:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP31:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP32:%.*]] = fdiv fast double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP33:%.*]] = fdiv fast double 1.000000e+00, [[A]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER8_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER17_NEW]] ], [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[T0:%.*]] = load double, double* [[ARRAYIDX]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP43:%.*]] = fmul fast double [[T0]], [[TMP39]] +; CHECK-NEXT: [[TMP34:%.*]] = fmul fast double [[T0]], [[TMP30]] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store double [[TMP43]], double* [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP34]], double* [[ARRAYIDX2]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: [[T0_1:%.*]] = load double, double* [[ARRAYIDX_1]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP44:%.*]] = fmul fast double [[T0_1]], [[TMP40]] +; CHECK-NEXT: [[TMP35:%.*]] = fmul fast double [[T0_1]], [[TMP31]] ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: store double [[TMP44]], double* [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP35]], double* [[ARRAYIDX2_1]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_1]] ; CHECK-NEXT: [[T0_2:%.*]] = load double, double* [[ARRAYIDX_2]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP45:%.*]] = fmul fast double [[T0_2]], [[TMP41]] +; CHECK-NEXT: [[TMP36:%.*]] = fmul fast double [[T0_2]], [[TMP32]] ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: store double [[TMP45]], double* [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP36]], double* [[ARRAYIDX2_2]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[INDVARS_IV_NEXT_2]] ; CHECK-NEXT: [[T0_3:%.*]] = load double, double* [[ARRAYIDX_3]], align 8, !tbaa [[TBAA3]] -; CHECK-NEXT: [[TMP46:%.*]] = fmul fast double [[T0_3]], [[TMP42]] +; CHECK-NEXT: [[TMP37:%.*]] = fmul fast double [[T0_3]], [[TMP33]] ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: store double [[TMP46]], double* [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: store double [[TMP37]], double* [[ARRAYIDX2_3]], align 8, !tbaa [[TBAA3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ;