@@ -522,55 +522,111 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
522522; DEFAULT-LABEL: define void @multiple_exit_conditions(
523523; DEFAULT-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2:[0-9]+]] {
524524; DEFAULT-NEXT: [[ENTRY:.*:]]
525- ; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]]
525+ ; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
526+ ; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP0]], 1
527+ ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 257, [[TMP6]]
528+ ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
529+ ; DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
530+ ; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
531+ ; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
532+ ; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 257, [[TMP3]]
533+ ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
526534; DEFAULT: [[VECTOR_PH]]:
527- ; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 2048
535+ ; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
536+ ; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16
537+ ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]]
538+ ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]]
528539; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
529540; DEFAULT: [[VECTOR_BODY]]:
530541; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
531542; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
532543; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
533544; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2
534- ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
535- ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
536- ; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
537- ; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double>
538- ; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[NEXT_GEP]], align 8
539- ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
540- ; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
541- ; DEFAULT-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
545+ ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP1]], i64 0
546+ ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
547+ ; DEFAULT-NEXT: [[TMP8:%.*]] = or <vscale x 4 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
548+ ; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[TMP8]] to <vscale x 4 x double>
549+ ; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
550+ ; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
551+ ; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP11]]
552+ ; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
553+ ; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3
554+ ; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP14]]
555+ ; DEFAULT-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
556+ ; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 12
557+ ; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP17]]
558+ ; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[NEXT_GEP]], align 8
559+ ; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP12]], align 8
560+ ; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP15]], align 8
561+ ; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP18]], align 8
562+ ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
563+ ; DEFAULT-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
564+ ; DEFAULT-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
542565; DEFAULT: [[MIDDLE_BLOCK]]:
543- ; DEFAULT-NEXT: br label %[[SCALAR_PH:.*]]
544- ; DEFAULT: [[SCALAR_PH]]:
566+ ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 257, [[N_VEC]]
567+ ; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
568+ ; DEFAULT: [[VEC_EPILOG_ITER_CHECK]]:
569+ ; DEFAULT-NEXT: [[TMP20:%.*]] = mul i64 [[N_VEC]], 8
570+ ; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP20]]
571+ ; DEFAULT-NEXT: [[IND_END11:%.*]] = mul i64 [[N_VEC]], 2
572+ ; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP6]]
573+ ; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF24:![0-9]+]]
574+ ; DEFAULT: [[VEC_EPILOG_PH]]:
575+ ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
576+ ; DEFAULT-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
577+ ; DEFAULT-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 2
578+ ; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 257, [[TMP22]]
579+ ; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 257, [[N_MOD_VF2]]
580+ ; DEFAULT-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC3]], 8
581+ ; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
582+ ; DEFAULT-NEXT: [[TMP25:%.*]] = mul i64 [[N_VEC3]], 2
583+ ; DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
584+ ; DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]:
585+ ; DEFAULT-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
586+ ; DEFAULT-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX4]], 8
587+ ; DEFAULT-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX1]]
588+ ; DEFAULT-NEXT: [[TMP26:%.*]] = load i16, ptr [[SRC]], align 2
589+ ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP26]], i64 0
590+ ; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
591+ ; DEFAULT-NEXT: [[TMP27:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT7]], splat (i16 1)
592+ ; DEFAULT-NEXT: [[TMP28:%.*]] = uitofp <vscale x 2 x i16> [[TMP27]] to <vscale x 2 x double>
593+ ; DEFAULT-NEXT: store <vscale x 2 x double> [[TMP28]], ptr [[NEXT_GEP5]], align 8
594+ ; DEFAULT-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX4]], [[TMP22]]
595+ ; DEFAULT-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]]
596+ ; DEFAULT-NEXT: br i1 [[TMP29]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
597+ ; DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]:
598+ ; DEFAULT-NEXT: [[CMP_N9:%.*]] = icmp eq i64 257, [[N_VEC3]]
599+ ; DEFAULT-NEXT: br i1 [[CMP_N9]], [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
600+ ; DEFAULT: [[VEC_EPILOG_SCALAR_PH]]:
545601;
546602; PRED-LABEL: define void @multiple_exit_conditions(
547603; PRED-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2:[0-9]+]] {
548604; PRED-NEXT: [[ENTRY:.*:]]
549605; PRED-NEXT: br label %[[VECTOR_PH:.*]]
550606; PRED: [[VECTOR_PH]]:
551607; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
552- ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
608+ ; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
553609; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
554- ; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1
610+ ; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
555611; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]
556612; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 257, [[TMP7]]
557613; PRED-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0
558- ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1 .i64(i64 0, i64 257)
614+ ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1 .i64(i64 0, i64 257)
559615; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
560616; PRED: [[VECTOR_BODY]]:
561617; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
562- ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
618+ ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
563619; PRED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
564620; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
565621; PRED-NEXT: [[TMP12:%.*]] = load i16, ptr [[SRC]], align 2
566- ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP12]], i64 0
567- ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
568- ; PRED-NEXT: [[TMP13 :%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
569- ; PRED-NEXT: [[TMP14 :%.*]] = uitofp <vscale x 2 x i16> [[TMP13 ]] to <vscale x 2 x double>
570- ; PRED-NEXT: call void @llvm.masked.store.nxv2f64 .p0(<vscale x 2 x double> [[TMP14 ]], ptr align 8 [[NEXT_GEP]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
622+ ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP12]], i64 0
623+ ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
624+ ; PRED-NEXT: [[TMP11 :%.*]] = or <vscale x 4 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
625+ ; PRED-NEXT: [[TMP13 :%.*]] = uitofp <vscale x 4 x i16> [[TMP11 ]] to <vscale x 4 x double>
626+ ; PRED-NEXT: call void @llvm.masked.store.nxv4f64 .p0(<vscale x 4 x double> [[TMP13 ]], ptr align 8 [[NEXT_GEP]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
571627; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
572- ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1 .i64(i64 [[INDEX]], i64 [[TMP10]])
573- ; PRED-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
628+ ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1 .i64(i64 [[INDEX]], i64 [[TMP10]])
629+ ; PRED-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
574630; PRED-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true
575631; PRED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
576632; PRED: [[MIDDLE_BLOCK]]:
0 commit comments