134 changes: 66 additions & 68 deletions llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"

; Function Attrs: norecurse nounwind
define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[K:%.*]], 0
Expand Down Expand Up @@ -36,19 +36,17 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[S:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP9]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
; CHECK-NEXT: [[TMP10:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND_NEXT13]]
; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i32> [[TMP10]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i32> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_NEXT]]
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP13]], <16 x i8>* [[TMP15]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]]
; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP14]], align 1
; CHECK-NEXT: [[INDEX_NEXT_1]] = add i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND_NEXT13]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER_NEXT]], 1
Expand All @@ -70,9 +68,8 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_UNR]]
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]]
; CHECK-NEXT: store <16 x i8> [[TMP19]], ptr [[TMP20]], align 1
; CHECK-NEXT: br label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
Expand All @@ -95,8 +92,8 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
; CHECK-NEXT: [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0
; CHECK-NEXT: [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_PROL]]
; CHECK-NEXT: store i8 [[CONV_PROL]], i8* [[ARRAYIDX_PROL]], align 1
; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_PROL]]
; CHECK-NEXT: store i8 [[CONV_PROL]], ptr [[ARRAYIDX_PROL]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
; CHECK-NEXT: [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
Expand All @@ -118,64 +115,64 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[X]]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP28]]
; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]]
; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0
; CHECK-NEXT: [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store i8 [[CONV_1]], ptr [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP29]]
; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]]
; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0
; CHECK-NEXT: [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: store i8 [[CONV_2]], ptr [[ARRAYIDX_2]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP30]]
; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]]
; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0
; CHECK-NEXT: [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: store i8 [[CONV_3]], ptr [[ARRAYIDX_3]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP31]]
; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]]
; CHECK-NEXT: [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0
; CHECK-NEXT: [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: store i8 [[CONV_4]], ptr [[ARRAYIDX_4]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP32]]
; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]]
; CHECK-NEXT: [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0
; CHECK-NEXT: [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: store i8 [[CONV_5]], ptr [[ARRAYIDX_5]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP33]]
; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]]
; CHECK-NEXT: [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0
; CHECK-NEXT: [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: store i8 [[CONV_6]], ptr [[ARRAYIDX_6]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP34]]
; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]]
; CHECK-NEXT: [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0
; CHECK-NEXT: [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: store i8 [[CONV_7]], i8* [[ARRAYIDX_7]], align 1
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: store i8 [[CONV_7]], ptr [[ARRAYIDX_7]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
; CHECK-NEXT: [[EXITCOND_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[EXITCOND_7]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY]]
Expand All @@ -185,9 +182,9 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[K]] to i64
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[IDXPROM1]]
; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX2]], align 1
; CHECK-NEXT: ret i8* [[S]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[IDXPROM1]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX2]], align 1
; CHECK-NEXT: ret ptr [[S]]
;
entry:
%cmp10 = icmp sgt i32 %k, 0
Expand All @@ -211,13 +208,12 @@ vector.body: ; preds = %vector.body, %vecto
%1 = and <16 x i32> %0, %broadcast.splat
%2 = icmp eq <16 x i32> %1, zeroinitializer
%3 = select <16 x i1> %2, <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
%4 = getelementptr inbounds i8, i8* %s, i64 %index
%5 = bitcast i8* %4 to <16 x i8>*
store <16 x i8> %3, <16 x i8>* %5, align 1
%4 = getelementptr inbounds i8, ptr %s, i64 %index
store <16 x i8> %3, ptr %4, align 1
%index.next = add i64 %index, 16
%vec.ind.next13 = add <16 x i32> %vec.ind12, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%6 = icmp eq i64 %index.next, %n.vec
br i1 %6, label %middle.block, label %vector.body
%5 = icmp eq i64 %index.next, %n.vec
br i1 %5, label %middle.block, label %vector.body

middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
Expand All @@ -229,21 +225,21 @@ for.body.preheader: ; preds = %middle.block, %for.

for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
%7 = trunc i64 %indvars.iv to i32
%shl = shl i32 1, %7
%6 = trunc i64 %indvars.iv to i32
%shl = shl i32 1, %6
%and = and i32 %shl, %x
%tobool = icmp eq i32 %and, 0
%conv = select i1 %tobool, i8 48, i8 49
%arrayidx = getelementptr inbounds i8, i8* %s, i64 %indvars.iv
store i8 %conv, i8* %arrayidx, align 1
%arrayidx = getelementptr inbounds i8, ptr %s, i64 %indvars.iv
store i8 %conv, ptr %arrayidx, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body, %middle.block, %entry
%idxprom1 = sext i32 %k to i64
%arrayidx2 = getelementptr inbounds i8, i8* %s, i64 %idxprom1
store i8 0, i8* %arrayidx2, align 1
ret i8* %s
%arrayidx2 = getelementptr inbounds i8, ptr %s, i64 %idxprom1
store i8 0, ptr %arrayidx2, align 1
ret ptr %s
}

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"

; Function Attrs: norecurse nounwind
define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_addr #0 {
; CHECK-LABEL: @f(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[K:%.*]], 0
Expand Down Expand Up @@ -36,19 +36,17 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[S:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP7]], <16 x i8>* [[TMP9]], align 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]]
; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1
; CHECK-NEXT: [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1
; CHECK-NEXT: [[TMP10:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND_NEXT13]]
; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i32> [[TMP10]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq <16 x i32> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_NEXT]]
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP13]], <16 x i8>* [[TMP15]], align 1
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]]
; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP14]], align 1
; CHECK-NEXT: [[INDEX_NEXT_1]] = add i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND_NEXT13]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER_NEXT]], 1
Expand All @@ -70,9 +68,8 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_UNR]]
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]]
; CHECK-NEXT: store <16 x i8> [[TMP19]], ptr [[TMP20]], align 1
; CHECK-NEXT: br label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
Expand All @@ -95,8 +92,8 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
; CHECK-NEXT: [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0
; CHECK-NEXT: [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_PROL]]
; CHECK-NEXT: store i8 [[CONV_PROL]], i8* [[ARRAYIDX_PROL]], align 1
; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_PROL]]
; CHECK-NEXT: store i8 [[CONV_PROL]], ptr [[ARRAYIDX_PROL]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1
; CHECK-NEXT: [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1
Expand All @@ -118,64 +115,64 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[X]]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP28]]
; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]]
; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0
; CHECK-NEXT: [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store i8 [[CONV_1]], ptr [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP29]]
; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]]
; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0
; CHECK-NEXT: [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: store i8 [[CONV_2]], ptr [[ARRAYIDX_2]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP30]]
; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]]
; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0
; CHECK-NEXT: [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: store i8 [[CONV_3]], ptr [[ARRAYIDX_3]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP31]]
; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]]
; CHECK-NEXT: [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0
; CHECK-NEXT: [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: store i8 [[CONV_4]], ptr [[ARRAYIDX_4]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP32]]
; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]]
; CHECK-NEXT: [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0
; CHECK-NEXT: [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: store i8 [[CONV_5]], ptr [[ARRAYIDX_5]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP33]]
; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]]
; CHECK-NEXT: [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0
; CHECK-NEXT: [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: store i8 [[CONV_6]], ptr [[ARRAYIDX_6]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP34]]
; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]]
; CHECK-NEXT: [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0
; CHECK-NEXT: [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: store i8 [[CONV_7]], i8* [[ARRAYIDX_7]], align 1
; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDVARS_IV_NEXT_6]]
; CHECK-NEXT: store i8 [[CONV_7]], ptr [[ARRAYIDX_7]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV_NEXT_6]], 1
; CHECK-NEXT: [[EXITCOND_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[EXITCOND_7]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY]]
Expand All @@ -185,9 +182,9 @@ define i8* @f(i8* returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[K]] to i64
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[IDXPROM1]]
; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX2]], align 1
; CHECK-NEXT: ret i8* [[S]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[IDXPROM1]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX2]], align 1
; CHECK-NEXT: ret ptr [[S]]
;
entry:
%cmp10 = icmp sgt i32 %k, 0
Expand All @@ -211,13 +208,12 @@ vector.body: ; preds = %vector.body, %vecto
%1 = and <16 x i32> %0, %broadcast.splat
%2 = icmp eq <16 x i32> %1, zeroinitializer
%3 = select <16 x i1> %2, <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
%4 = getelementptr inbounds i8, i8* %s, i64 %index
%5 = bitcast i8* %4 to <16 x i8>*
store <16 x i8> %3, <16 x i8>* %5, align 1
%4 = getelementptr inbounds i8, ptr %s, i64 %index
store <16 x i8> %3, ptr %4, align 1
%index.next = add i64 %index, 16
%vec.ind.next13 = add <16 x i32> %vec.ind12, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%6 = icmp eq i64 %index.next, %n.vec
br i1 %6, label %middle.block, label %vector.body
%5 = icmp eq i64 %index.next, %n.vec
br i1 %5, label %middle.block, label %vector.body

middle.block: ; preds = %vector.body
%cmp.n = icmp eq i64 %n.vec, %wide.trip.count
Expand All @@ -229,21 +225,21 @@ for.body.preheader: ; preds = %middle.block, %for.

for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
%7 = trunc i64 %indvars.iv to i32
%shl = shl i32 1, %7
%6 = trunc i64 %indvars.iv to i32
%shl = shl i32 1, %6
%and = and i32 %shl, %x
%tobool = icmp eq i32 %and, 0
%conv = select i1 %tobool, i8 48, i8 49
%arrayidx = getelementptr inbounds i8, i8* %s, i64 %indvars.iv
store i8 %conv, i8* %arrayidx, align 1
%arrayidx = getelementptr inbounds i8, ptr %s, i64 %indvars.iv
store i8 %conv, ptr %arrayidx, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body, %middle.block, %entry
%idxprom1 = sext i32 %k to i64
%arrayidx2 = getelementptr inbounds i8, i8* %s, i64 %idxprom1
store i8 0, i8* %arrayidx2, align 1
ret i8* %s
%arrayidx2 = getelementptr inbounds i8, ptr %s, i64 %idxprom1
store i8 0, ptr %arrayidx2, align 1
ret ptr %s
}

10 changes: 5 additions & 5 deletions llvm/test/Transforms/LoopUnroll/RISCV/invalid-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64-unknown-unknown"

define void @invalid(<vscale x 1 x i8>* %p) nounwind ssp {
define void @invalid(ptr %p) nounwind ssp {
; CHECK-LABEL: @invalid(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[A:%.*]] = load <vscale x 1 x i8>, <vscale x 1 x i8>* [[P:%.*]], align 1
; CHECK-NEXT: [[A:%.*]] = load <vscale x 1 x i8>, ptr [[P:%.*]], align 1
; CHECK-NEXT: [[B:%.*]] = add <vscale x 1 x i8> [[A]], [[A]]
; CHECK-NEXT: store <vscale x 1 x i8> [[B]], <vscale x 1 x i8>* [[P]], align 1
; CHECK-NEXT: store <vscale x 1 x i8> [[B]], ptr [[P]], align 1
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 10
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
Expand All @@ -29,9 +29,9 @@ entry:

for.body: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%a = load <vscale x 1 x i8>, <vscale x 1 x i8>* %p
%a = load <vscale x 1 x i8>, ptr %p
%b = add <vscale x 1 x i8> %a, %a
store <vscale x 1 x i8> %b, <vscale x 1 x i8>* %p
store <vscale x 1 x i8> %b, ptr %p
%inc = add nsw i32 %i.0, 1
%cmp = icmp slt i32 %i.0, 10
br i1 %cmp, label %for.body, label %for.end
Expand Down
172 changes: 86 additions & 86 deletions llvm/test/Transforms/LoopUnroll/RISCV/unroll.ll

Large diffs are not rendered by default.

246 changes: 123 additions & 123 deletions llvm/test/Transforms/LoopUnroll/WebAssembly/basic-unrolling.ll

Large diffs are not rendered by default.

56 changes: 25 additions & 31 deletions llvm/test/Transforms/LoopUnroll/X86/partial.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,27 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @foo(i32* noalias nocapture readnone %ip, double %alpha, double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
define void @foo(ptr noalias nocapture readnone %ip, double %alpha, ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
entry:
br label %vector.body

vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%0 = getelementptr inbounds double, double* %b, i64 %index
%1 = bitcast double* %0 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %1, align 8
%0 = getelementptr inbounds double, ptr %b, i64 %index
%wide.load = load <2 x double>, ptr %0, align 8
%.sum9 = or i64 %index, 2
%2 = getelementptr double, double* %b, i64 %.sum9
%3 = bitcast double* %2 to <2 x double>*
%wide.load8 = load <2 x double>, <2 x double>* %3, align 8
%4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
%5 = fadd <2 x double> %wide.load8, <double 1.000000e+00, double 1.000000e+00>
%6 = getelementptr inbounds double, double* %a, i64 %index
%7 = bitcast double* %6 to <2 x double>*
store <2 x double> %4, <2 x double>* %7, align 8
%1 = getelementptr double, ptr %b, i64 %.sum9
%wide.load8 = load <2 x double>, ptr %1, align 8
%2 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
%3 = fadd <2 x double> %wide.load8, <double 1.000000e+00, double 1.000000e+00>
%4 = getelementptr inbounds double, ptr %a, i64 %index
store <2 x double> %2, ptr %4, align 8
%.sum10 = or i64 %index, 2
%8 = getelementptr double, double* %a, i64 %.sum10
%9 = bitcast double* %8 to <2 x double>*
store <2 x double> %5, <2 x double>* %9, align 8
%5 = getelementptr double, ptr %a, i64 %.sum10
store <2 x double> %3, ptr %5, align 8
%index.next = add i64 %index, 4
%10 = icmp eq i64 %index.next, 1600
br i1 %10, label %for.end, label %vector.body
%6 = icmp eq i64 %index.next, 1600
br i1 %6, label %for.end, label %vector.body

; FIXME: We should probably unroll this loop by a factor of 2, but the cost
; model needs to be fixed to account for instructions likely to be folded
Expand All @@ -39,20 +35,18 @@ for.end: ; preds = %vector.body
ret void
}

define void @bar(i32* noalias nocapture readnone %ip, double %alpha, double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
define void @bar(ptr noalias nocapture readnone %ip, double %alpha, ptr noalias nocapture %a, ptr noalias nocapture readonly %b) #0 {
entry:
br label %vector.body

vector.body: ; preds = %vector.body, %entry
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%v0 = getelementptr inbounds double, double* %b, i64 %index
%v1 = bitcast double* %v0 to <2 x double>*
%wide.load = load <2 x double>, <2 x double>* %v1, align 8
%v0 = getelementptr inbounds double, ptr %b, i64 %index
%wide.load = load <2 x double>, ptr %v0, align 8
%v4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
%v5 = fmul <2 x double> %v4, <double 8.000000e+00, double 8.000000e+00>
%v6 = getelementptr inbounds double, double* %a, i64 %index
%v7 = bitcast double* %v6 to <2 x double>*
store <2 x double> %v5, <2 x double>* %v7, align 8
%v6 = getelementptr inbounds double, ptr %a, i64 %index
store <2 x double> %v5, ptr %v6, align 8
%index.next = add i64 %index, 2
%v10 = icmp eq i64 %index.next, 1600
br i1 %v10, label %for.end, label %vector.body
Expand All @@ -76,28 +70,28 @@ for.end: ; preds = %vector.body
ret void
}

define zeroext i16 @test1(i16* nocapture readonly %arr, i32 %n) #0 {
define zeroext i16 @test1(ptr nocapture readonly %arr, i32 %n) #0 {
entry:
%cmp25 = icmp eq i32 %n, 0
br i1 %cmp25, label %for.end, label %for.body

for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv
%0 = load i16, i16* %arrayidx, align 2
%arrayidx = getelementptr inbounds i16, ptr %arr, i64 %indvars.iv
%0 = load i16, ptr %arrayidx, align 2
%mul = shl i16 %0, 1
%add = add i16 %mul, %reduction.026
%sext = mul i64 %indvars.iv, 12884901888
%idxprom3 = ashr exact i64 %sext, 32
%arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3
%1 = load i16, i16* %arrayidx4, align 2
%arrayidx4 = getelementptr inbounds i16, ptr %arr, i64 %idxprom3
%1 = load i16, ptr %arrayidx4, align 2
%mul2 = shl i16 %1, 1
%add7 = add i16 %add, %mul2
%sext28 = mul i64 %indvars.iv, 21474836480
%idxprom10 = ashr exact i64 %sext28, 32
%arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10
%2 = load i16, i16* %arrayidx11, align 2
%arrayidx11 = getelementptr inbounds i16, ptr %arr, i64 %idxprom10
%2 = load i16, ptr %arrayidx11, align 2
%mul3 = shl i16 %2, 1
%add14 = add i16 %add7, %mul3
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/Transforms/LoopUnroll/X86/store_cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK: Loop Size = 27
; CHECK-NOT: UNROLLING loop %loop.2.header

define void @foo(i32 * %out) {
define void @foo(ptr %out) {
entry:
%0 = alloca [1024 x i32]
%x0 = alloca [1024 x i32]
Expand All @@ -27,26 +27,26 @@ loop.header:
br label %loop.body

loop.body:
%ptr = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter
store i32 %counter, i32* %ptr
%ptr = getelementptr [1024 x i32], ptr %0, i32 0, i32 %counter
store i32 %counter, ptr %ptr
%val = add i32 %counter, 5
%xptr = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter
store i32 %val, i32* %xptr
%xptr = getelementptr [1024 x i32], ptr %x0, i32 0, i32 %counter
store i32 %val, ptr %xptr
%val1 = add i32 %counter, 6
%xptr1 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter
store i32 %val1, i32* %xptr1
%xptr1 = getelementptr [1024 x i32], ptr %x01, i32 0, i32 %counter
store i32 %val1, ptr %xptr1
%val2 = add i32 %counter, 7
%xptr2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter
store i32 %val2, i32* %xptr2
%xptr2 = getelementptr [1024 x i32], ptr %x02, i32 0, i32 %counter
store i32 %val2, ptr %xptr2
%val3 = add i32 %counter, 8
%xptr3 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter
store i32 %val3, i32* %xptr3
%xptr3 = getelementptr [1024 x i32], ptr %x03, i32 0, i32 %counter
store i32 %val3, ptr %xptr3
%val4 = add i32 %counter, 9
%xptr4 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter
store i32 %val4, i32* %xptr4
%xptr4 = getelementptr [1024 x i32], ptr %x04, i32 0, i32 %counter
store i32 %val4, ptr %xptr4
%val5 = add i32 %counter, 10
%xptr5 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter
store i32 %val5, i32* %xptr5
%xptr5 = getelementptr [1024 x i32], ptr %x05, i32 0, i32 %counter
store i32 %val5, ptr %xptr5
br label %loop.inc

loop.inc:
Expand All @@ -55,9 +55,9 @@ loop.inc:
br i1 %1, label %exit.0, label %loop.header

exit.0:
%2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 5
%3 = load i32, i32* %2
store i32 %3, i32 * %out
%2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 5
%3 = load i32, ptr %2
store i32 %3, ptr %out
br label %loop.2.header


Expand All @@ -66,28 +66,28 @@ loop.2.header:
br label %loop.2.body

loop.2.body:
%ptr.2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter.2
store i32 %counter.2, i32* %ptr.2
%ptr.2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 %counter.2
store i32 %counter.2, ptr %ptr.2
%val.2 = add i32 %counter.2, 5
%xptr.2 = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter.2
store i32 %val.2, i32* %xptr.2
%xptr.2 = getelementptr [1024 x i32], ptr %x0, i32 0, i32 %counter.2
store i32 %val.2, ptr %xptr.2
%val1.2 = add i32 %counter.2, 6
%xptr1.2 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter.2
store i32 %val1, i32* %xptr1.2
%xptr1.2 = getelementptr [1024 x i32], ptr %x01, i32 0, i32 %counter.2
store i32 %val1, ptr %xptr1.2
%val2.2 = add i32 %counter.2, 7
%xptr2.2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter.2
store i32 %val2, i32* %xptr2.2
%xptr2.2 = getelementptr [1024 x i32], ptr %x02, i32 0, i32 %counter.2
store i32 %val2, ptr %xptr2.2
%val3.2 = add i32 %counter.2, 8
%xptr3.2 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter.2
store i32 %val3.2, i32* %xptr3.2
%xptr3.2 = getelementptr [1024 x i32], ptr %x03, i32 0, i32 %counter.2
store i32 %val3.2, ptr %xptr3.2
%val4.2 = add i32 %counter.2, 9
%xptr4.2 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter.2
store i32 %val4.2, i32* %xptr4.2
%xptr4.2 = getelementptr [1024 x i32], ptr %x04, i32 0, i32 %counter.2
store i32 %val4.2, ptr %xptr4.2
%val5.2 = add i32 %counter.2, 10
%xptr5.2 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter.2
store i32 %val5.2, i32* %xptr5.2
%xptr6.2 = getelementptr [1024 x i32], [1024 x i32]* %x06, i32 0, i32 %counter.2
store i32 %val5.2, i32* %xptr6.2
%xptr5.2 = getelementptr [1024 x i32], ptr %x05, i32 0, i32 %counter.2
store i32 %val5.2, ptr %xptr5.2
%xptr6.2 = getelementptr [1024 x i32], ptr %x06, i32 0, i32 %counter.2
store i32 %val5.2, ptr %xptr6.2
br label %loop.2.inc

loop.2.inc:
Expand All @@ -96,9 +96,9 @@ loop.2.inc:
br i1 %4, label %exit.2, label %loop.2.header

exit.2:
%x2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 6
%x3 = load i32, i32* %x2
%out2 = getelementptr i32, i32 * %out, i32 1
store i32 %3, i32 * %out2
%x2 = getelementptr [1024 x i32], ptr %0, i32 0, i32 6
%x3 = load i32, ptr %x2
%out2 = getelementptr i32, ptr %out, i32 1
store i32 %3, ptr %out2
ret void
}
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopUnroll/basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
; This should not unroll since the address of the loop header is taken.

; CHECK-LABEL: @test1(
; CHECK: store i8* blockaddress(@test1, %l1), i8** %P
; CHECK: store ptr blockaddress(@test1, %l1), ptr %P
; CHECK: l1:
; CHECK-NEXT: phi i32
; rdar://8287027
define i32 @test1(i8** %P) nounwind ssp {
define i32 @test1(ptr %P) nounwind ssp {
entry:
store i8* blockaddress(@test1, %l1), i8** %P
store ptr blockaddress(@test1, %l1), ptr %P
br label %l1

l1: ; preds = %l1, %entry
Expand All @@ -27,7 +27,7 @@ l2: ; preds = %l1
; This should not unroll since the call is 'noduplicate'.

; CHECK-LABEL: @test2(
define i32 @test2(i8** %P) nounwind ssp {
define i32 @test2(ptr %P) nounwind ssp {
entry:
br label %l1

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,62 +15,62 @@
; Function Attrs: nounwind
declare void @llvm.assume(i1) #1

define i32 @foo(i32* %a) {
define i32 @foo(ptr %a) {
; ANALYZE-FULL-LABEL: @foo(
; ANALYZE-FULL-NEXT: entry:
; ANALYZE-FULL-NEXT: br label [[FOR_BODY:%.*]]
; ANALYZE-FULL: for.body:
; ANALYZE-FULL-NEXT: br i1 true, label [[DO_STORE:%.*]], label [[FOR_NEXT:%.*]]
; ANALYZE-FULL: do_store:
; ANALYZE-FULL-NEXT: store i32 0, i32* [[A:%.*]], align 4
; ANALYZE-FULL-NEXT: store i32 0, ptr [[A:%.*]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT]]
; ANALYZE-FULL: for.next:
; ANALYZE-FULL-NEXT: br i1 true, label [[DO_STORE_1:%.*]], label [[FOR_NEXT_1:%.*]]
; ANALYZE-FULL: do_store.1:
; ANALYZE-FULL-NEXT: [[GEP_1:%.*]] = getelementptr i32, i32* [[A]], i32 1
; ANALYZE-FULL-NEXT: store i32 1, i32* [[GEP_1]], align 4
; ANALYZE-FULL-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[A]], i32 1
; ANALYZE-FULL-NEXT: store i32 1, ptr [[GEP_1]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_1]]
; ANALYZE-FULL: for.next.1:
; ANALYZE-FULL-NEXT: br i1 true, label [[DO_STORE_2:%.*]], label [[FOR_NEXT_2:%.*]]
; ANALYZE-FULL: do_store.2:
; ANALYZE-FULL-NEXT: [[GEP_2:%.*]] = getelementptr i32, i32* [[A]], i32 2
; ANALYZE-FULL-NEXT: store i32 2, i32* [[GEP_2]], align 4
; ANALYZE-FULL-NEXT: [[GEP_2:%.*]] = getelementptr i32, ptr [[A]], i32 2
; ANALYZE-FULL-NEXT: store i32 2, ptr [[GEP_2]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_2]]
; ANALYZE-FULL: for.next.2:
; ANALYZE-FULL-NEXT: br i1 true, label [[DO_STORE_3:%.*]], label [[FOR_NEXT_3:%.*]]
; ANALYZE-FULL: do_store.3:
; ANALYZE-FULL-NEXT: [[GEP_3:%.*]] = getelementptr i32, i32* [[A]], i32 3
; ANALYZE-FULL-NEXT: store i32 3, i32* [[GEP_3]], align 4
; ANALYZE-FULL-NEXT: [[GEP_3:%.*]] = getelementptr i32, ptr [[A]], i32 3
; ANALYZE-FULL-NEXT: store i32 3, ptr [[GEP_3]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_3]]
; ANALYZE-FULL: for.next.3:
; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_4:%.*]], label [[FOR_NEXT_4:%.*]]
; ANALYZE-FULL: do_store.4:
; ANALYZE-FULL-NEXT: [[GEP_4:%.*]] = getelementptr i32, i32* [[A]], i32 4
; ANALYZE-FULL-NEXT: store i32 4, i32* [[GEP_4]], align 4
; ANALYZE-FULL-NEXT: [[GEP_4:%.*]] = getelementptr i32, ptr [[A]], i32 4
; ANALYZE-FULL-NEXT: store i32 4, ptr [[GEP_4]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_4]]
; ANALYZE-FULL: for.next.4:
; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_5:%.*]], label [[FOR_NEXT_5:%.*]]
; ANALYZE-FULL: do_store.5:
; ANALYZE-FULL-NEXT: [[GEP_5:%.*]] = getelementptr i32, i32* [[A]], i32 5
; ANALYZE-FULL-NEXT: store i32 5, i32* [[GEP_5]], align 4
; ANALYZE-FULL-NEXT: [[GEP_5:%.*]] = getelementptr i32, ptr [[A]], i32 5
; ANALYZE-FULL-NEXT: store i32 5, ptr [[GEP_5]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_5]]
; ANALYZE-FULL: for.next.5:
; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_6:%.*]], label [[FOR_NEXT_6:%.*]]
; ANALYZE-FULL: do_store.6:
; ANALYZE-FULL-NEXT: [[GEP_6:%.*]] = getelementptr i32, i32* [[A]], i32 6
; ANALYZE-FULL-NEXT: store i32 6, i32* [[GEP_6]], align 4
; ANALYZE-FULL-NEXT: [[GEP_6:%.*]] = getelementptr i32, ptr [[A]], i32 6
; ANALYZE-FULL-NEXT: store i32 6, ptr [[GEP_6]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_6]]
; ANALYZE-FULL: for.next.6:
; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_7:%.*]], label [[FOR_NEXT_7:%.*]]
; ANALYZE-FULL: do_store.7:
; ANALYZE-FULL-NEXT: [[GEP_7:%.*]] = getelementptr i32, i32* [[A]], i32 7
; ANALYZE-FULL-NEXT: store i32 7, i32* [[GEP_7]], align 4
; ANALYZE-FULL-NEXT: [[GEP_7:%.*]] = getelementptr i32, ptr [[A]], i32 7
; ANALYZE-FULL-NEXT: store i32 7, ptr [[GEP_7]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_7]]
; ANALYZE-FULL: for.next.7:
; ANALYZE-FULL-NEXT: br i1 false, label [[DO_STORE_8:%.*]], label [[FOR_NEXT_8:%.*]]
; ANALYZE-FULL: do_store.8:
; ANALYZE-FULL-NEXT: [[GEP_8:%.*]] = getelementptr i32, i32* [[A]], i32 8
; ANALYZE-FULL-NEXT: store i32 8, i32* [[GEP_8]], align 4
; ANALYZE-FULL-NEXT: [[GEP_8:%.*]] = getelementptr i32, ptr [[A]], i32 8
; ANALYZE-FULL-NEXT: store i32 8, ptr [[GEP_8]], align 4
; ANALYZE-FULL-NEXT: br label [[FOR_NEXT_8]]
; ANALYZE-FULL: for.next.8:
; ANALYZE-FULL-NEXT: ret i32 9
Expand All @@ -86,8 +86,8 @@ define i32 @foo(i32* %a) {
; DONT-ANALYZE-FULL-NEXT: [[CMP2:%.*]] = icmp ule i32 [[INDVAR]], 3
; DONT-ANALYZE-FULL-NEXT: br i1 [[CMP2]], label [[DO_STORE:%.*]], label [[FOR_NEXT]]
; DONT-ANALYZE-FULL: do_store:
; DONT-ANALYZE-FULL-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[INDVAR]]
; DONT-ANALYZE-FULL-NEXT: store i32 [[INDVAR]], i32* [[GEP]], align 4
; DONT-ANALYZE-FULL-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[INDVAR]]
; DONT-ANALYZE-FULL-NEXT: store i32 [[INDVAR]], ptr [[GEP]], align 4
; DONT-ANALYZE-FULL-NEXT: br label [[FOR_NEXT]]
; DONT-ANALYZE-FULL: for.next:
; DONT-ANALYZE-FULL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INDVAR_NEXT]], 9
Expand All @@ -107,8 +107,8 @@ for.body:
br i1 %cmp2, label %do_store, label %for.next

do_store:
%gep = getelementptr i32, i32* %a, i32 %indvar
store i32 %indvar, i32* %gep
%gep = getelementptr i32, ptr %a, i32 %indvar
store i32 %indvar, ptr %gep
br label %for.next

for.next:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopUnroll/debug-info.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ for.body: ; preds = %entry, %for.body
; CHECK: call void @llvm.dbg.value(metadata i32 16, metadata !12, metadata !DIExpression()), !dbg !15
; CHECK: call void @llvm.dbg.value(metadata i32 64, metadata !12, metadata !DIExpression()), !dbg !15

%call = tail call i32 (i32, ...) bitcast (i32 (...)* @bar to i32 (i32, ...)*)(i32 %shr) #3, !dbg !20
%call = tail call i32 (i32, ...) @bar(i32 %shr) #3, !dbg !20
%shl = shl i32 %i.04, 2, !dbg !21
tail call void @llvm.dbg.value(metadata i32 %shl, metadata !12, metadata !DIExpression()), !dbg !15
%cmp = icmp slt i32 %shl, 33, !dbg !22
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-LABEL: @forced(
; CHECK: load
; CHECK: load
define void @forced(i32* nocapture %a) {
define void @forced(ptr nocapture %a) {
entry:
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %arrayidx, align 4
store i32 %inc, ptr %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopUnroll/disable_nonforced.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-LABEL: @disable_nonforced(
; CHECK: load
; CHECK-NOT: load
define void @disable_nonforced(i32* nocapture %a) {
define void @disable_nonforced(ptr nocapture %a) {
entry:
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %arrayidx, align 4
store i32 %inc, ptr %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopUnroll/disable_nonforced_count.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: store
; CHECK: store
; CHECK-NOT: store
define void @disable_nonforced_count(i32* nocapture %a) {
define void @disable_nonforced_count(ptr nocapture %a) {
entry:
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %arrayidx, align 4
store i32 %inc, ptr %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopUnroll/disable_nonforced_enable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: store
; CHECK: store
; CHECK-NOT: store
define void @disable_nonforced_enable(i32* nocapture %a) {
define void @disable_nonforced_enable(ptr nocapture %a) {
entry:
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %arrayidx, align 4
store i32 %inc, ptr %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 64
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/LoopUnroll/disable_nonforced_full.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
; CHECK: store
; CHECK: store
; CHECK-NOT: store
define void @disable_nonforced_full(i32* nocapture %a) {
define void @disable_nonforced_full(ptr nocapture %a) {
entry:
br label %for.body

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %arrayidx, align 4
store i32 %inc, ptr %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 4
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopUnroll/ephemeral.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
; CHECK: for.body:
; CHECK-NOT: for.end:

define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
define i32 @test1(ptr nocapture %a) nounwind uwtable readonly {
entry:
br label %for.body

for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4

; This loop will be completely unrolled, even with these extra instructions,
; but only because they're ephemeral (and, thus, free).
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/LoopUnroll/epilog_const_phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
; CHECK: for.body.epil

; Function Attrs: norecurse nounwind uwtable
define void @const_phi_val(i32 %i0, i32* nocapture %a) {
define void @const_phi_val(i32 %i0, ptr nocapture %a) {
entry:
%cmp6 = icmp slt i32 %i0, 1000
br i1 %cmp6, label %for.body.preheader, label %for.end
Expand All @@ -20,8 +20,8 @@ for.body.preheader: ; preds = %entry
for.body: ; preds = %for.body, %for.body.preheader
%indvars.iv = phi i64 [ %tmp, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%s.08 = phi i32 [ 0, %for.body.preheader ], [ %xor, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
store i32 %s.08, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
store i32 %s.08, ptr %arrayidx, align 4
%xor = xor i32 %s.08, 1
%indvars.iv.next = add nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1000
Expand All @@ -41,7 +41,7 @@ for.end: ; preds = %for.end.loopexit, %
; CHECK: for.body.prol

; Function Attrs: norecurse nounwind uwtable
define void @var_phi_val(i32 %i0, i32* nocapture %a) {
define void @var_phi_val(i32 %i0, ptr nocapture %a) {
entry:
%cmp6 = icmp slt i32 %i0, 1000
br i1 %cmp6, label %for.body.preheader, label %for.end
Expand All @@ -52,7 +52,7 @@ for.body.preheader: ; preds = %entry

for.body: ; preds = %for.body, %for.body.preheader
%indvars.iv = phi i64 [ %tmp, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%indvars.iv.next = add nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1000
br i1 %exitcond, label %for.end.loopexit, label %for.body
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopUnroll/followup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
;
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
define i32 @test(ptr nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
%cmp1 = icmp eq i32 %n, 0
br i1 %cmp1, label %for.end, label %for.body

for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
%0 = load i32, ptr %arrayidx, align 4
%add = add nsw i32 %0, %sum.02
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
Expand Down
40 changes: 18 additions & 22 deletions llvm/test/Transforms/LoopUnroll/full-unroll-crashers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ entry:
for.body:
%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%idx = zext i32 undef to i64
%add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
%add.ptr = getelementptr inbounds i64, ptr null, i64 %idx
%inc = add nuw nsw i64 %phi, 1
%cmp = icmp ult i64 %inc, 999
br i1 %cmp, label %for.body, label %for.exit
Expand All @@ -27,7 +27,7 @@ entry:

for.body:
%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%x = getelementptr i32, <4 x ptr> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%inc = add nuw nsw i64 %phi, 1
%cmp = icmp ult i64 %inc, 999
br i1 %cmp, label %for.body, label %for.exit
Expand All @@ -42,8 +42,8 @@ entry:

for.body: ; preds = %for.inc, %entry
%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
%x1 = load i32, i32* %arrayidx1, align 4
%arrayidx1 = getelementptr inbounds [10 x i32], ptr @known_constant, i64 0, i64 %iv.0
%x1 = load i32, ptr %arrayidx1, align 4
%cmp = icmp eq i32 %x1, undef
br i1 %cmp, label %if.then, label %for.inc

Expand All @@ -65,8 +65,8 @@ entry:

for.body:
%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
%x1 = load i32, i32* %arrayidx1, align 4
%arrayidx1 = getelementptr inbounds [10 x i32], ptr @known_constant, i64 0, i64 %iv.0
%x1 = load i32, ptr %arrayidx1, align 4
switch i32 %x1, label %l1 [
]

Expand All @@ -90,9 +90,8 @@ entry:
for.body:
%phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
%vec_phi = phi <4 x i32> [ <i32 0, i32 0, i32 0, i32 0>, %entry ], [ %r, %for.body ]
%arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %phi
%bc = bitcast i32* %arrayidx to <4 x i32>*
%x = load <4 x i32>, < 4 x i32>* %bc, align 4
%arrayidx = getelementptr inbounds [10 x i32], ptr @known_constant, i64 0, i64 %phi
%x = load <4 x i32>, < 4 x i32>* %arrayidx, align 4
%r = add <4 x i32> %x, %vec_phi
%inc = add nuw nsw i64 %phi, 1
%cmp = icmp ult i64 %inc, 999
Expand All @@ -110,8 +109,7 @@ for.body:
br i1 true, label %for.inc, label %if.then

if.then:
%arraydecay = getelementptr inbounds [1 x i32], [1 x i32]* null, i64 0, i64 0
%x = ptrtoint i32* %arraydecay to i64
%x = ptrtoint ptr null to i64
br label %for.inc

for.inc:
Expand All @@ -130,8 +128,7 @@ for.body.lr.ph:

for.body:
%iv = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%offset = getelementptr inbounds float, float* null, i32 3
%bc = bitcast float* %offset to i64*
%offset = getelementptr inbounds float, ptr null, i32 3
%inc = add nuw nsw i32 %iv, 1
br i1 false, label %for.body, label %exit

Expand All @@ -147,14 +144,14 @@ entry:

for.body:
%iv = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%m = phi i32* [ @i, %entry ], [ %m, %for.inc ]
%m = phi ptr [ @i, %entry ], [ %m, %for.inc ]
br i1 undef, label %if.else, label %if.then

if.then:
unreachable

if.else:
%cmp = icmp ult i32* %m, null
%cmp = icmp ult ptr %m, null
br i1 %cmp, label %cond.false, label %for.inc

cond.false:
Expand All @@ -178,8 +175,8 @@ for.body:
br i1 undef, label %for.body2, label %for.inc

for.body2:
%idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
%x = load i32, i32* %idx, align 1
%idx = getelementptr inbounds [10 x i32], ptr @known_constant, i64 0, i64 %iv
%x = load i32, ptr %idx, align 1
br label %for.inc

for.inc:
Expand All @@ -198,8 +195,8 @@ for.header:
br label %for.body

for.body:
%d = phi i32* [ null, %for.header ]
%cmp = icmp eq i32* %d, null
%d = phi ptr [ null, %for.header ]
%cmp = icmp eq ptr %d, null
br i1 undef, label %for.end, label %for.header

for.end:
Expand All @@ -212,9 +209,8 @@ entry:

for.body:
%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
%bc = bitcast i32* %arrayidx1 to i64*
%x1 = load i64, i64* %bc, align 4
%arrayidx1 = getelementptr inbounds [10 x i32], ptr @known_constant, i64 0, i64 %iv.0
%x1 = load i64, ptr %arrayidx1, align 4
%x2 = add i64 10, %x1
%iv.1 = add nuw nsw i64 %iv.0, 1
%exitcond = icmp eq i64 %iv.1, 10
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; Though @unknown_global is initialized with constant values, we can't consider
; it as a constant, so we shouldn't unroll the loop.
; CHECK-LABEL: @foo
; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
define i32 @foo(i32* noalias nocapture readonly %src) {
; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], ptr @unknown_global, i64 0, i64 %iv
define i32 @foo(ptr noalias nocapture readonly %src) {
entry:
br label %loop

loop: ; preds = %loop, %entry
%iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
%r = phi i32 [ 0, %entry ], [ %add, %loop ]
%arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
%src_element = load i32, i32* %arrayidx, align 4
%array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
%const_array_element = load i32, i32* %array_const_idx, align 4
%arrayidx = getelementptr inbounds i32, ptr %src, i64 %iv
%src_element = load i32, ptr %arrayidx, align 4
%array_const_idx = getelementptr inbounds [9 x i32], ptr @unknown_global, i64 0, i64 %iv
%const_array_element = load i32, ptr %array_const_idx, align 4
%mul = mul nsw i32 %src_element, %const_array_element
%add = add nsw i32 %mul, %r
%inc = add nuw nsw i64 %iv, 1
Expand All @@ -34,18 +34,18 @@ loop.end: ; preds = %loop
; Similarly, we can't consider 'weak' symbols as a known constant value, so we
; shouldn't unroll the loop.
; CHECK-LABEL: @foo2
; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
define i32 @foo2(i32* noalias nocapture readonly %src) {
; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], ptr @weak_constant, i64 0, i64 %iv
define i32 @foo2(ptr noalias nocapture readonly %src) {
entry:
br label %loop

loop: ; preds = %loop, %entry
%iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
%r = phi i32 [ 0, %entry ], [ %add, %loop ]
%arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
%src_element = load i32, i32* %arrayidx, align 4
%array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
%const_array_element = load i32, i32* %array_const_idx, align 4
%arrayidx = getelementptr inbounds i32, ptr %src, i64 %iv
%src_element = load i32, ptr %arrayidx, align 4
%array_const_idx = getelementptr inbounds [9 x i32], ptr @weak_constant, i64 0, i64 %iv
%const_array_element = load i32, ptr %array_const_idx, align 4
%mul = mul nsw i32 %src_element, %const_array_element
%add = add nsw i32 %mul, %r
%inc = add nuw nsw i64 %iv, 1
Expand All @@ -62,15 +62,15 @@ loop.end: ; preds = %loop
; clean up almost entire loop. Make sure that we do not unroll such loop.
; CHECK-LABEL: @foo3
; CHECK: br i1 %exitcond, label %loop.end, label %loop.header
define i32 @foo3(i32* noalias nocapture readonly %src) {
define i32 @foo3(ptr noalias nocapture readonly %src) {
entry:
br label %loop.header

loop.header:
%iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ]
%r1 = phi i32 [ 0, %entry ], [ %r3, %loop.latch ]
%arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
%src_element = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %src, i64 %iv
%src_element = load i32, ptr %arrayidx, align 4
%cmp = icmp eq i32 0, %src_element
br i1 %cmp, label %loop.if, label %loop.latch

Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-LABEL: @branch_folded
; CHECK-NOT: br i1 %
; CHECK: ret i32
define i32 @branch_folded(i32* noalias nocapture readonly %b) {
define i32 @branch_folded(ptr noalias nocapture readonly %b) {
entry:
br label %for.body

for.body: ; preds = %for.inc, %entry
%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
%r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
%x1 = load i32, i32* %arrayidx1, align 4
%arrayidx1 = getelementptr inbounds [10 x i32], ptr @known_constant, i64 0, i64 %iv.0
%x1 = load i32, ptr %arrayidx1, align 4
%cmp = icmp eq i32 %x1, 0
%iv.1 = add nuw nsw i64 %iv.0, 1
br i1 %cmp, label %if.then, label %for.inc

if.then: ; preds = %for.body
%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
%x2 = load i32, i32* %arrayidx2, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv.0
%x2 = load i32, ptr %arrayidx2, align 4
%add = add nsw i32 %x2, %r.0
br label %for.inc

Expand All @@ -50,9 +50,9 @@ entry:
br label %while.body

while.body:
%iv.0 = phi i32* [ getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 0), %entry ], [ %iv.1, %while.body ]
%iv.1 = getelementptr inbounds i32, i32* %iv.0, i64 1
%exitcond = icmp eq i32* %iv.1, getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 9)
%iv.0 = phi ptr [ @known_constant, %entry ], [ %iv.1, %while.body ]
%iv.1 = getelementptr inbounds i32, ptr %iv.0, i64 1
%exitcond = icmp eq ptr %iv.1, getelementptr inbounds ([10 x i32], ptr @known_constant, i64 0, i64 9)
br i1 %exitcond, label %loop.exit, label %while.body

loop.exit:
Expand All @@ -62,16 +62,16 @@ loop.exit:
; Check that we don't crash when we analyze ptrtoint cast.
; CHECK-LABEL: @ptrtoint_cast_crash
; CHECK: ret void
define void @ptrtoint_cast_crash(i8 * %a) {
define void @ptrtoint_cast_crash(ptr %a) {
entry:
%limit = getelementptr i8, i8* %a, i64 512
%limit = getelementptr i8, ptr %a, i64 512
br label %loop.body

loop.body:
%iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
%cast = ptrtoint i8* %iv.0 to i64
%iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
%exitcond = icmp ne i8* %iv.1, %limit
%iv.0 = phi ptr [ %a, %entry ], [ %iv.1, %loop.body ]
%cast = ptrtoint ptr %iv.0 to i64
%iv.1 = getelementptr inbounds i8, ptr %iv.0, i64 1
%exitcond = icmp ne ptr %iv.1, %limit
br i1 %exitcond, label %loop.body, label %loop.exit

loop.exit:
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,24 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; instruction is simplified, the other operand might become dead.
; In this test we have::
; for i in 1..10:
; r += A[i] * B[i]
; r += Aptr B[i]
; A[i] is 0 almost at every iteration, so there is no need in loading B[i] at
; all.


; CHECK-LABEL: @unroll_dce
; CHECK-NOT: br i1 %exitcond, label %for.end, label %for.body
define i32 @unroll_dce(i32* noalias nocapture readonly %b) {
define i32 @unroll_dce(ptr noalias nocapture readonly %b) {
entry:
br label %for.body

for.body: ; preds = %for.body, %entry
%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
%r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
%arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
%x1 = load i32, i32* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
%x2 = load i32, i32* %arrayidx2, align 4
%arrayidx1 = getelementptr inbounds [10 x i32], ptr @known_constant, i64 0, i64 %iv.0
%x1 = load i32, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv.0
%x2 = load i32, ptr %arrayidx2, align 4
%mul = mul i32 %x1, %x2
%r.1 = add i32 %mul, %r.0
%iv.1 = add nuw nsw i64 %iv.0, 1
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-LABEL: @not_simplified_geps
; CHECK: br i1 %
; CHECK: ret void
define void @not_simplified_geps(i32* noalias %b, i32* noalias %c) {
define void @not_simplified_geps(ptr noalias %b, ptr noalias %c) {
entry:
br label %for.body

for.body:
%iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
%arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %iv.0
%x1 = load i32, i32* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %c, i64 %iv.0
store i32 %x1, i32* %arrayidx2, align 4
%arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %iv.0
%x1 = load i32, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv.0
store i32 %x1, ptr %arrayidx2, align 4
%iv.1 = add nuw nsw i64 %iv.0, 1
%exitcond = icmp eq i64 %iv.1, 10
br i1 %exitcond, label %for.end, label %for.body
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@
; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop-unroll' -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=20 -unroll-max-percent-threshold-boost=100 | FileCheck %s -check-prefix=TEST3

; If the absolute threshold is too low, we should not unroll:
; TEST1: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
; TEST1: %array_const_idx = getelementptr inbounds [9 x i32], ptr @known_constant, i64 0, i64 %iv

; Otherwise, we should:
; TEST2-NOT: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
; TEST2-NOT: %array_const_idx = getelementptr inbounds [9 x i32], ptr @known_constant, i64 0, i64 %iv

; If we do not boost threshold, the unroll will not happen:
; TEST3: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
; TEST3: %array_const_idx = getelementptr inbounds [9 x i32], ptr @known_constant, i64 0, i64 %iv

; And check that we don't crash when we're not allowed to do any analysis.
; RUN: opt < %s -passes=loop-unroll -unroll-max-iteration-count-to-analyze=0 -disable-output
Expand All @@ -46,17 +46,17 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16

define i32 @foo(i32* noalias nocapture readonly %src) {
define i32 @foo(ptr noalias nocapture readonly %src) {
entry:
br label %loop

loop: ; preds = %loop, %entry
%iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
%r = phi i32 [ 0, %entry ], [ %add, %loop ]
%arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
%src_element = load i32, i32* %arrayidx, align 4
%array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
%const_array_element = load i32, i32* %array_const_idx, align 4
%arrayidx = getelementptr inbounds i32, ptr %src, i64 %iv
%src_element = load i32, ptr %arrayidx, align 4
%array_const_idx = getelementptr inbounds [9 x i32], ptr @known_constant, i64 0, i64 %iv
%const_array_element = load i32, ptr %array_const_idx, align 4
%mul = mul nsw i32 %src_element, %const_array_element
%add = add nsw i32 %mul, %r
%inc = add nuw nsw i64 %iv, 1
Expand Down
104 changes: 52 additions & 52 deletions llvm/test/Transforms/LoopUnroll/full-unroll-keep-first-exit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@
; RUN: opt -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' < %s | FileCheck %s

; Unroll twice, with first loop exit kept
define void @s32_max1(i32 %n, i32* %p) {
define void @s32_max1(i32 %n, ptr %p) {
;
; CHECK-LABEL: @s32_max1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 1
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
; CHECK: do.body.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: br label [[DO_END]]
; CHECK: do.end:
; CHECK-NEXT: ret void
Expand All @@ -28,8 +28,8 @@ entry:

do.body:
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
store i32 %i.0, i32* %arrayidx, align 4
%arrayidx = getelementptr i32, ptr %p, i32 %i.0
store i32 %i.0, ptr %arrayidx, align 4
%inc = add i32 %i.0, 1
%cmp = icmp slt i32 %i.0, %add
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
Expand All @@ -39,24 +39,24 @@ do.end:
}

; Unroll thrice, with first loop exit kept
define void @s32_max2(i32 %n, i32* %p) {
define void @s32_max2(i32 %n, ptr %p) {
;
; CHECK-LABEL: @s32_max2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 2
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[N]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
; CHECK: do.body.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[INC_1:%.*]] = add i32 [[INC]], 1
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC_1]]
; CHECK-NEXT: store i32 [[INC_1]], i32* [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[P]], i32 [[INC_1]]
; CHECK-NEXT: store i32 [[INC_1]], ptr [[ARRAYIDX_2]], align 4
; CHECK-NEXT: br label [[DO_END]]
; CHECK: do.end:
; CHECK-NEXT: ret void
Expand All @@ -67,8 +67,8 @@ entry:

do.body:
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
store i32 %i.0, i32* %arrayidx, align 4
%arrayidx = getelementptr i32, ptr %p, i32 %i.0
store i32 %i.0, ptr %arrayidx, align 4
%inc = add i32 %i.0, 1
%cmp = icmp slt i32 %i.0, %add
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
Expand All @@ -78,16 +78,16 @@ do.end:
}

; Should not be unrolled
define void @s32_maxx(i32 %n, i32 %x, i32* %p) {
define void @s32_maxx(i32 %n, i32 %x, ptr %p) {
;
; CHECK-LABEL: @s32_maxx(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], [[N:%.*]]
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[I_0]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]]
Expand All @@ -100,8 +100,8 @@ entry:

do.body:
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
store i32 %i.0, i32* %arrayidx, align 4
%arrayidx = getelementptr i32, ptr %p, i32 %i.0
store i32 %i.0, ptr %arrayidx, align 4
%inc = add i32 %i.0, 1
%cmp = icmp slt i32 %i.0, %add
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
Expand All @@ -111,7 +111,7 @@ do.end:
}

; Should not be unrolled
define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, ptr %p) {
;
; CHECK-LABEL: @s32_max2_unpredictable_exit(
; CHECK-NEXT: entry:
Expand All @@ -122,8 +122,8 @@ define void @s32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_0]], [[X:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_END:%.*]], label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[I_0]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_0]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP1]], label [[DO_BODY]], label [[DO_END]]
Expand All @@ -140,8 +140,8 @@ do.body:
br i1 %cmp, label %do.end, label %if.end ; unpredictable

if.end:
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
store i32 %i.0, i32* %arrayidx, align 4
%arrayidx = getelementptr i32, ptr %p, i32 %i.0
store i32 %i.0, ptr %arrayidx, align 4
%inc = add i32 %i.0, 1
%cmp1 = icmp slt i32 %i.0, %add
br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
Expand All @@ -151,21 +151,21 @@ do.end:
}

; Unroll twice, with first loop exit kept
define void @u32_max1(i32 %n, i32* %p) {
define void @u32_max1(i32 %n, ptr %p) {
;
; CHECK-LABEL: @u32_max1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 1
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[N]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
; CHECK: do.body.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: br label [[DO_END]]
; CHECK: do.end:
; CHECK-NEXT: ret void
Expand All @@ -176,8 +176,8 @@ entry:

do.body:
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
store i32 %i.0, i32* %arrayidx, align 4
%arrayidx = getelementptr i32, ptr %p, i32 %i.0
store i32 %i.0, ptr %arrayidx, align 4
%inc = add i32 %i.0, 1
%cmp = icmp ult i32 %i.0, %add
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 1 times
Expand All @@ -187,24 +187,24 @@ do.end:
}

; Unroll thrice, with first loop exit kept
define void @u32_max2(i32 %n, i32* %p) {
define void @u32_max2(i32 %n, ptr %p) {
;
; CHECK-LABEL: @u32_max2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[N:%.*]], 2
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[N]]
; CHECK-NEXT: store i32 [[N]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC:%.*]] = add i32 [[N]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[N]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY_1:%.*]], label [[DO_END:%.*]]
; CHECK: do.body.1:
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[P]], i32 [[INC]]
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[INC_1:%.*]] = add i32 [[INC]], 1
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, i32* [[P]], i32 [[INC_1]]
; CHECK-NEXT: store i32 [[INC_1]], i32* [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[P]], i32 [[INC_1]]
; CHECK-NEXT: store i32 [[INC_1]], ptr [[ARRAYIDX_2]], align 4
; CHECK-NEXT: br label [[DO_END]]
; CHECK: do.end:
; CHECK-NEXT: ret void
Expand All @@ -215,8 +215,8 @@ entry:

do.body:
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
store i32 %i.0, i32* %arrayidx, align 4
%arrayidx = getelementptr i32, ptr %p, i32 %i.0
store i32 %i.0, ptr %arrayidx, align 4
%inc = add i32 %i.0, 1
%cmp = icmp ult i32 %i.0, %add
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or 2 times
Expand All @@ -226,16 +226,16 @@ do.end:
}

; Should not be unrolled
define void @u32_maxx(i32 %n, i32 %x, i32* %p) {
define void @u32_maxx(i32 %n, i32 %x, ptr %p) {
;
; CHECK-LABEL: @u32_maxx(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], [[N:%.*]]
; CHECK-NEXT: br label [[DO_BODY:%.*]]
; CHECK: do.body:
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[INC:%.*]], [[DO_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[I_0]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_BODY]], label [[DO_END:%.*]]
Expand All @@ -248,8 +248,8 @@ entry:

do.body:
%i.0 = phi i32 [ %n, %entry ], [ %inc, %do.body ]
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
store i32 %i.0, i32* %arrayidx, align 4
%arrayidx = getelementptr i32, ptr %p, i32 %i.0
store i32 %i.0, ptr %arrayidx, align 4
%inc = add i32 %i.0, 1
%cmp = icmp ult i32 %i.0, %add
br i1 %cmp, label %do.body, label %do.end ; taken either 0 or x times
Expand All @@ -259,7 +259,7 @@ do.end:
}

; Should not be unrolled
define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, ptr %p) {
;
; CHECK-LABEL: @u32_max2_unpredictable_exit(
; CHECK-NEXT: entry:
Expand All @@ -270,8 +270,8 @@ define void @u32_max2_unpredictable_exit(i32 %n, i32 %x, i32* %p) {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_0]], [[X:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[DO_END:%.*]], label [[IF_END]]
; CHECK: if.end:
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, i32* [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[I_0]]
; CHECK-NEXT: store i32 [[I_0]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add i32 [[I_0]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_0]], [[ADD]]
; CHECK-NEXT: br i1 [[CMP1]], label [[DO_BODY]], label [[DO_END]]
Expand All @@ -288,8 +288,8 @@ do.body:
br i1 %cmp, label %do.end, label %if.end ; unpredictable

if.end:
%arrayidx = getelementptr i32, i32* %p, i32 %i.0
store i32 %i.0, i32* %arrayidx, align 4
%arrayidx = getelementptr i32, ptr %p, i32 %i.0
store i32 %i.0, ptr %arrayidx, align 4
%inc = add i32 %i.0, 1
%cmp1 = icmp ult i32 %i.0, %add
br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
Expand Down
100 changes: 44 additions & 56 deletions llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,23 @@ define i1 @test_latch() {
; CHECK-NEXT: start:
; CHECK-NEXT: [[A1:%.*]] = alloca [2 x i64], align 8
; CHECK-NEXT: [[A2:%.*]] = alloca [2 x i64], align 8
; CHECK-NEXT: [[A1_0:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0
; CHECK-NEXT: store i64 -5015437470765251660, i64* [[A1_0]], align 8
; CHECK-NEXT: [[A1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1
; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A1_1]], align 8
; CHECK-NEXT: [[A2_0:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0
; CHECK-NEXT: store i64 -5015437470765251660, i64* [[A2_0]], align 8
; CHECK-NEXT: [[A2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1
; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A2_1]], align 8
; CHECK-NEXT: store i64 -5015437470765251660, ptr [[A1]], align 8
; CHECK-NEXT: [[A1_1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A1]], i64 0, i64 1
; CHECK-NEXT: store i64 -8661621401413125213, ptr [[A1_1]], align 8
; CHECK-NEXT: store i64 -5015437470765251660, ptr [[A2]], align 8
; CHECK-NEXT: [[A2_1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A2]], i64 0, i64 1
; CHECK-NEXT: store i64 -8661621401413125213, ptr [[A2_1]], align 8
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[A1]], align 8
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[A2]], align 8
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LATCH:%.*]], label [[EXIT:%.*]]
; CHECK: latch:
; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1
; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8
; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8
; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A1]], i64 0, i64 1
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A2]], i64 0, i64 1
; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, ptr [[GEP1_1]], align 8
; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, ptr [[GEP2_1]], align 8
; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LATCH_1:%.*]], label [[EXIT]]
; CHECK: latch.1:
Expand All @@ -43,22 +39,20 @@ define i1 @test_latch() {
start:
%a1 = alloca [2 x i64], align 8
%a2 = alloca [2 x i64], align 8
%a1.0 = getelementptr inbounds [2 x i64], [2 x i64]* %a1, i64 0, i64 0
store i64 -5015437470765251660, i64* %a1.0, align 8
%a1.1 = getelementptr inbounds [2 x i64], [2 x i64]* %a1, i64 0, i64 1
store i64 -8661621401413125213, i64* %a1.1, align 8
%a2.0 = getelementptr inbounds [2 x i64], [2 x i64]* %a2, i64 0, i64 0
store i64 -5015437470765251660, i64* %a2.0, align 8
%a2.1 = getelementptr inbounds [2 x i64], [2 x i64]* %a2, i64 0, i64 1
store i64 -8661621401413125213, i64* %a2.1, align 8
store i64 -5015437470765251660, ptr %a1, align 8
%a1.1 = getelementptr inbounds [2 x i64], ptr %a1, i64 0, i64 1
store i64 -8661621401413125213, ptr %a1.1, align 8
store i64 -5015437470765251660, ptr %a2, align 8
%a2.1 = getelementptr inbounds [2 x i64], ptr %a2, i64 0, i64 1
store i64 -8661621401413125213, ptr %a2.1, align 8
br label %loop

loop:
%iv = phi i64 [ 0, %start ], [ %iv.next, %latch ]
%gep1 = getelementptr inbounds [2 x i64], [2 x i64]* %a1, i64 0, i64 %iv
%gep2 = getelementptr inbounds [2 x i64], [2 x i64]* %a2, i64 0, i64 %iv
%load1 = load i64, i64* %gep1, align 8
%load2 = load i64, i64* %gep2, align 8
%gep1 = getelementptr inbounds [2 x i64], ptr %a1, i64 0, i64 %iv
%gep2 = getelementptr inbounds [2 x i64], ptr %a2, i64 0, i64 %iv
%load1 = load i64, ptr %gep1, align 8
%load2 = load i64, ptr %gep2, align 8
%exitcond2 = icmp eq i64 %load1, %load2
br i1 %exitcond2, label %latch, label %exit

Expand All @@ -77,31 +71,27 @@ define i1 @test_non_latch() {
; CHECK-NEXT: start:
; CHECK-NEXT: [[A1:%.*]] = alloca [2 x i64], align 8
; CHECK-NEXT: [[A2:%.*]] = alloca [2 x i64], align 8
; CHECK-NEXT: [[A1_0:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0
; CHECK-NEXT: store i64 -5015437470765251660, i64* [[A1_0]], align 8
; CHECK-NEXT: [[A1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1
; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A1_1]], align 8
; CHECK-NEXT: [[A2_0:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0
; CHECK-NEXT: store i64 -5015437470765251660, i64* [[A2_0]], align 8
; CHECK-NEXT: [[A2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1
; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A2_1]], align 8
; CHECK-NEXT: store i64 -5015437470765251660, ptr [[A1]], align 8
; CHECK-NEXT: [[A1_1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A1]], i64 0, i64 1
; CHECK-NEXT: store i64 -8661621401413125213, ptr [[A1_1]], align 8
; CHECK-NEXT: store i64 -5015437470765251660, ptr [[A2]], align 8
; CHECK-NEXT: [[A2_1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A2]], i64 0, i64 1
; CHECK-NEXT: store i64 -8661621401413125213, ptr [[A2_1]], align 8
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: br label [[LATCH:%.*]]
; CHECK: latch:
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8
; CHECK-NEXT: [[LOAD1:%.*]] = load i64, ptr [[A1]], align 8
; CHECK-NEXT: [[LOAD2:%.*]] = load i64, ptr [[A2]], align 8
; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]]
; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]]
; CHECK: loop.1:
; CHECK-NEXT: br label [[LATCH_1:%.*]]
; CHECK: latch.1:
; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1
; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8
; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8
; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A1]], i64 0, i64 1
; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], ptr [[A2]], i64 0, i64 1
; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, ptr [[GEP1_1]], align 8
; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, ptr [[GEP2_1]], align 8
; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]]
; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2:%.*]], label [[EXIT]]
; CHECK: loop.2:
Expand All @@ -115,14 +105,12 @@ define i1 @test_non_latch() {
start:
%a1 = alloca [2 x i64], align 8
%a2 = alloca [2 x i64], align 8
%a1.0 = getelementptr inbounds [2 x i64], [2 x i64]* %a1, i64 0, i64 0
store i64 -5015437470765251660, i64* %a1.0, align 8
%a1.1 = getelementptr inbounds [2 x i64], [2 x i64]* %a1, i64 0, i64 1
store i64 -8661621401413125213, i64* %a1.1, align 8
%a2.0 = getelementptr inbounds [2 x i64], [2 x i64]* %a2, i64 0, i64 0
store i64 -5015437470765251660, i64* %a2.0, align 8
%a2.1 = getelementptr inbounds [2 x i64], [2 x i64]* %a2, i64 0, i64 1
store i64 -8661621401413125213, i64* %a2.1, align 8
store i64 -5015437470765251660, ptr %a1, align 8
%a1.1 = getelementptr inbounds [2 x i64], ptr %a1, i64 0, i64 1
store i64 -8661621401413125213, ptr %a1.1, align 8
store i64 -5015437470765251660, ptr %a2, align 8
%a2.1 = getelementptr inbounds [2 x i64], ptr %a2, i64 0, i64 1
store i64 -8661621401413125213, ptr %a2.1, align 8
br label %loop

loop:
Expand All @@ -132,10 +120,10 @@ loop:

latch:
%iv.next = add nuw nsw i64 %iv, 1
%gep1 = getelementptr inbounds [2 x i64], [2 x i64]* %a1, i64 0, i64 %iv
%gep2 = getelementptr inbounds [2 x i64], [2 x i64]* %a2, i64 0, i64 %iv
%load1 = load i64, i64* %gep1, align 8
%load2 = load i64, i64* %gep2, align 8
%gep1 = getelementptr inbounds [2 x i64], ptr %a1, i64 0, i64 %iv
%gep2 = getelementptr inbounds [2 x i64], ptr %a2, i64 0, i64 %iv
%load1 = load i64, ptr %gep1, align 8
%load2 = load i64, ptr %gep2, align 8
%exitcond2 = icmp eq i64 %load1, %load2
br i1 %exitcond2, label %loop, label %exit

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
;; Check that we don't emit expensive instructions to compute trip
;; counts when unrolling loops.

define i32 @test(i64 %v12, i8* %array, i64* %loc) {
define i32 @test(i64 %v12, ptr %array, ptr %loc) {
; CHECK-LABEL: @test(
; CHECK-NOT: udiv
entry:
%step = load i64, i64* %loc, !range !0
%step = load i64, ptr %loc, !range !0
br label %loop

loop: ; preds = %entry, %loop
%k.015 = phi i64 [ %v15, %loop ], [ %v12, %entry ]
%v14 = getelementptr inbounds i8, i8* %array, i64 %k.015
store i8 0, i8* %v14
%v14 = getelementptr inbounds i8, ptr %array, i64 %k.015
store i8 0, ptr %v14
%v15 = add nuw nsw i64 %k.015, %step
%v16 = icmp slt i64 %v15, 8193
br i1 %v16, label %loop, label %loopexit
Expand All @@ -29,14 +29,14 @@ loopexit: ; preds = %loop
;; exists in the code and we don't need to expand it once more.
;; Thus, it shouldn't prevent us from unrolling the loop.

define i32 @test2(i64* %loc, i64 %conv7) {
define i32 @test2(ptr %loc, i64 %conv7) {
; CHECK-LABEL: @test2(
; CHECK: udiv
; CHECK: udiv
; CHECK-NOT: udiv
; CHECK-LABEL: for.body
entry:
%rem0 = load i64, i64* %loc, align 8
%rem0 = load i64, ptr %loc, align 8
%ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression
br label %bb1
bb1:
Expand All @@ -54,7 +54,7 @@ for.body:
br i1 %cmp, label %exit, label %for.body
exit:
%rem3 = phi i64 [ %rem2, %for.body ]
store i64 %rem3, i64* %loc, align 8
store i64 %rem3, ptr %loc, align 8
ret i32 0
}

Expand Down
Loading