Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,17 @@ define void @func_21() {
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]]
; CHECK: pred.load.continue:
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
; CHECK: pred.load.if1:
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP10]], i32 1
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]]
; CHECK: pred.load.continue2:
Expand All @@ -45,17 +45,17 @@ define void @func_21() {
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @B, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[TMP0]]
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i32> [[TMP13]], i32 0
; CHECK-NEXT: store i32 [[TMP16]], i32* [[TMP15]], align 4
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @B, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP13]], i32 1
; CHECK-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4
; CHECK-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
Expand All @@ -73,10 +73,10 @@ define void @func_21() {
; CHECK: loop:
; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[LV:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[A_PTR:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[LV]] = load i32, i32* [[A_PTR]], align 4
; CHECK-NEXT: [[B_PTR:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @B, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[SCALAR_RECUR]], i32* [[B_PTR]], align 4
; CHECK-NEXT: [[A_PTR:%.*]] = getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: [[LV]] = load i32, ptr [[A_PTR]], align 4
; CHECK-NEXT: [[B_PTR:%.*]] = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 [[SCALAR_RECUR]], ptr [[B_PTR]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]]
Expand All @@ -89,10 +89,10 @@ entry:
loop: ; preds = %loop, %entry
%rec = phi i32 [ 0, %entry], [ %lv, %loop ]
%indvars.iv = phi i64 [ 0, %entry], [ %indvars.iv.next, %loop ]
%A.ptr= getelementptr inbounds [5 x i32], [5 x i32]* @A, i64 0, i64 %indvars.iv
%lv = load i32, i32* %A.ptr, align 4
%B.ptr = getelementptr inbounds [5 x i32], [5 x i32]* @B, i64 0, i64 %indvars.iv
store i32 %rec, i32* %B.ptr, align 4
%A.ptr= getelementptr inbounds [5 x i32], ptr @A, i64 0, i64 %indvars.iv
%lv = load i32, ptr %A.ptr, align 4
%B.ptr = getelementptr inbounds [5 x i32], ptr @B, i64 0, i64 %indvars.iv
store i32 %rec, ptr %B.ptr, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 5
br i1 %exitcond, label %exit, label %loop
Expand Down
811 changes: 398 additions & 413 deletions llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll

Large diffs are not rendered by default.

278 changes: 134 additions & 144 deletions llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll

Large diffs are not rendered by default.

53 changes: 24 additions & 29 deletions llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,15 @@

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"

define void @foo(i64* %ptr, i32* %ptr.2) {
define void @foo(ptr %ptr, ptr %ptr.2) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR_21:%.*]] = bitcast i32* [[PTR_2:%.*]] to i8*
; CHECK-NEXT: [[PTR3:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
; CHECK: vector.memcheck:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[PTR_2]], i64 1
; CHECK-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i64, i64* [[PTR]], i64 80
; CHECK-NEXT: [[SCEVGEP45:%.*]] = bitcast i64* [[SCEVGEP4]] to i8*
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[PTR_21]], [[SCEVGEP45]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[PTR3]], [[SCEVGEP2]]
; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[PTR_2:%.*]], i64 4
; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 640
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[PTR_2]], [[UGLYGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[UGLYGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
Expand All @@ -30,36 +26,35 @@ define void @foo(i64* %ptr, i32* %ptr.2) {
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 2
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], 3
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: store i32 [[TMP4]], i32* [[PTR_2]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[TMP6]], i32 0
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <4 x i64>*
; CHECK-NEXT: store <4 x i64> [[VEC_IND]], <4 x i64>* [[TMP8]], align 8, !alias.scope !3
; CHECK-NEXT: store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[TMP5]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
; CHECK-NEXT: store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8, !alias.scope !3
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 80, 80
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 80, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ 82, [[MIDDLE_BLOCK]] ], [ 2, [[ENTRY]] ], [ 2, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 82, [[MIDDLE_BLOCK]] ], [ 2, [[ENTRY]] ], [ 2, [[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: unreachable
; CHECK: loop:
; CHECK-NEXT: [[CAN_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[CAN_IV_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 4294967295
; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP10]] to i32
; CHECK-NEXT: store i32 [[TMP12]], i32* [[PTR_2]], align 4
; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 [[CAN_IV]]
; CHECK-NEXT: store i64 [[TMP10]], i64* [[GEP_PTR]], align 8
; CHECK-NEXT: [[TMP13]] = add nuw nsw i64 [[TMP11]], 1
; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], 80
; CHECK-NEXT: [[TMP9:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 4294967295
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32
; CHECK-NEXT: store i32 [[TMP11]], ptr [[PTR_2]], align 4
; CHECK-NEXT: [[GEP_PTR:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[CAN_IV]]
; CHECK-NEXT: store i64 [[TMP9]], ptr [[GEP_PTR]], align 8
; CHECK-NEXT: [[TMP12]] = add nuw nsw i64 [[TMP10]], 1
; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], 80
; CHECK-NEXT: [[CAN_IV_NEXT]] = add nuw nsw i64 [[CAN_IV]], 1
; CHECK-NEXT: br i1 [[TMP14]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand All @@ -71,9 +66,9 @@ loop:
%0 = phi i64 [ 2, %entry ], [ %3, %loop ]
%1 = and i64 %0, 4294967295
%2 = trunc i64 %0 to i32
store i32 %2, i32* %ptr.2
%gep.ptr = getelementptr inbounds i64, i64* %ptr, i64 %can.iv
store i64 %0, i64* %gep.ptr
store i32 %2, ptr %ptr.2
%gep.ptr = getelementptr inbounds i64, ptr %ptr, i64 %can.iv
store i64 %0, ptr %gep.ptr
%3 = add nuw nsw i64 %1, 1
%4 = icmp sgt i32 %2, 80
%can.iv.next = add nuw nsw i64 %can.iv, 1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/LoopVectorize/X86/propagate-metadata.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
; CHECK-LABEL: @no_propagate_range_metadata(
; CHECK: load <16 x i8>
; CHECK: store <16 x i8>
define void @no_propagate_range_metadata(i8* readonly %first.coerce, i8* readnone %last.coerce, i8* nocapture %result) {
define void @no_propagate_range_metadata(ptr readonly %first.coerce, ptr readnone %last.coerce, ptr nocapture %result) {
for.body.preheader:
br label %for.body

for.body: ; preds = %for.body, %for.body.preheader
%result.addr.05 = phi i8* [ %incdec.ptr, %for.body ], [ %result, %for.body.preheader ]
%first.sroa.0.04 = phi i8* [ %incdec.ptr.i.i.i, %for.body ], [ %first.coerce, %for.body.preheader ]
%0 = load i8, i8* %first.sroa.0.04, align 1, !range !0
store i8 %0, i8* %result.addr.05, align 1
%incdec.ptr.i.i.i = getelementptr inbounds i8, i8* %first.sroa.0.04, i64 1
%incdec.ptr = getelementptr inbounds i8, i8* %result.addr.05, i64 1
%lnot.i = icmp eq i8* %incdec.ptr.i.i.i, %last.coerce
%result.addr.05 = phi ptr [ %incdec.ptr, %for.body ], [ %result, %for.body.preheader ]
%first.sroa.0.04 = phi ptr [ %incdec.ptr.i.i.i, %for.body ], [ %first.coerce, %for.body.preheader ]
%0 = load i8, ptr %first.sroa.0.04, align 1, !range !0
store i8 %0, ptr %result.addr.05, align 1
%incdec.ptr.i.i.i = getelementptr inbounds i8, ptr %first.sroa.0.04, i64 1
%incdec.ptr = getelementptr inbounds i8, ptr %result.addr.05, i64 1
%lnot.i = icmp eq ptr %incdec.ptr.i.i.i, %last.coerce
br i1 %lnot.i, label %for.end.loopexit, label %for.body

for.end.loopexit: ; preds = %for.body
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/X86/ptr-indvar-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ entry:
br label %while.body

while.body:
%p.05 = phi i8* [ %add.ptr, %while.body ], [ null, %entry ]
%p.05 = phi ptr [ %add.ptr, %while.body ], [ null, %entry ]
%p1.addr.04 = phi i128 [ %sub, %while.body ], [ %p1, %entry ]
%add.ptr = getelementptr inbounds i8, i8* %p.05, i32 2
%add.ptr = getelementptr inbounds i8, ptr %p.05, i32 2
%sub = add nsw i128 %p1.addr.04, -2
%tobool = icmp eq i128 %sub, 0
br i1 %tobool, label %while.end, label %while.body
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,28 @@ define void @_Z3fn1v() #0 {
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30>, %vector.ph ], [ [[VEC_IND_NEXT4:%.*]], %vector.body ]
; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, <16 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND]]
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP12]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP15]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0
; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body
;
entry:
%0 = load i32, i32* @c, align 4
%0 = load i32, ptr @c, align 4
%cmp34 = icmp sgt i32 %0, 8
br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup

for.body.lr.ph: ; preds = %entry
%1 = load i32, i32* @a, align 4
%1 = load i32, ptr @a, align 4
%tobool = icmp eq i32 %1, 0
%2 = load i64, i64* @b, align 8
%2 = load i64, ptr @b, align 8
%mul = mul i64 %2, 4063299859190
%tobool6 = icmp eq i64 %mul, 0
%3 = sext i32 %0 to i64
Expand All @@ -57,25 +57,25 @@ for.body.us: ; preds = %for.body.us.prehead
%indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.cond.cleanup4.us-lcssa.us.us ], [ 8, %for.body.us.preheader ]
%indvars.iv70 = phi i64 [ %indvars.iv.next71, %for.cond.cleanup4.us-lcssa.us.us ], [ 0, %for.body.us.preheader ]
%4 = sub nsw i64 8, %indvars.iv78
%add.ptr.us = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv78
%add.ptr.us = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 %indvars.iv78
%5 = add nsw i64 %4, %indvars.iv70
%arraydecay.us.us.us = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %5, i64 0
%arraydecay.us.us.us = getelementptr inbounds [10 x i32], ptr %add.ptr.us, i64 %5, i64 0
br i1 %tobool6, label %for.body5.us.us.us.preheader, label %for.body5.us.us48.preheader

for.body5.us.us48.preheader: ; preds = %for.body.us
store i32 8, i32* %arraydecay.us.us.us, align 16
store i32 8, ptr %arraydecay.us.us.us, align 16
%indvars.iv.next66 = or i64 %indvars.iv70, 1
%6 = add nsw i64 %4, %indvars.iv.next66
%arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %6, i64 0
store i32 8, i32* %arraydecay.us.us55.1, align 8
%arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], ptr %add.ptr.us, i64 %6, i64 0
store i32 8, ptr %arraydecay.us.us55.1, align 8
br label %for.cond.cleanup4.us-lcssa.us.us

for.body5.us.us.us.preheader: ; preds = %for.body.us
store i32 7, i32* %arraydecay.us.us.us, align 16
store i32 7, ptr %arraydecay.us.us.us, align 16
%indvars.iv.next73 = or i64 %indvars.iv70, 1
%7 = add nsw i64 %4, %indvars.iv.next73
%arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %7, i64 0
store i32 7, i32* %arraydecay.us.us.us.1, align 8
%arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], ptr %add.ptr.us, i64 %7, i64 0
store i32 7, ptr %arraydecay.us.us.us.1, align 8
br label %for.cond.cleanup4.us-lcssa.us.us

for.cond.cleanup4.us-lcssa.us.us: ; preds = %for.body5.us.us48.preheader, %for.body5.us.us.us.preheader
Expand All @@ -97,14 +97,14 @@ for.body: ; preds = %for.body.preheader,
%indvars.iv95 = phi i64 [ %indvars.iv.next96, %for.body ], [ 8, %for.body.preheader ]
%indvars.iv87 = phi i64 [ %indvars.iv.next88, %for.body ], [ 0, %for.body.preheader ]
%8 = sub nsw i64 8, %indvars.iv95
%add.ptr = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv95
%add.ptr = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 %indvars.iv95
%9 = add nsw i64 %8, %indvars.iv87
%arraydecay.us31 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %9, i64 0
store i32 8, i32* %arraydecay.us31, align 16
%arraydecay.us31 = getelementptr inbounds [10 x i32], ptr %add.ptr, i64 %9, i64 0
store i32 8, ptr %arraydecay.us31, align 16
%indvars.iv.next90 = or i64 %indvars.iv87, 1
%10 = add nsw i64 %8, %indvars.iv.next90
%arraydecay.us31.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %10, i64 0
store i32 8, i32* %arraydecay.us31.1, align 8
%arraydecay.us31.1 = getelementptr inbounds [10 x i32], ptr %add.ptr, i64 %10, i64 0
store i32 8, ptr %arraydecay.us31.1, align 8
%indvars.iv.next96 = add nuw nsw i64 %indvars.iv95, 2
%cmp = icmp slt i64 %indvars.iv.next96, %3
%indvars.iv.next88 = add nuw nsw i64 %indvars.iv87, 2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ target triple = "xcore"
;CHECK: entry:
;CHECK-NOT: vector.body
;CHECK-NEXT: br label %do.body
define void @f(i8* nocapture %ptr, i32 %len) {
define void @f(ptr nocapture %ptr, i32 %len) {
entry:
br label %do.body
do.body:
%ptr.addr.0 = phi i8* [ %ptr, %entry ], [ %incdec.ptr, %do.body ]
%ptr.addr.0 = phi ptr [ %ptr, %entry ], [ %incdec.ptr, %do.body ]
%len.addr.0 = phi i32 [ %len, %entry ], [ %dec, %do.body ]
%incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.0, i32 1
store i8 0, i8* %ptr.addr.0, align 1
%incdec.ptr = getelementptr inbounds i8, ptr %ptr.addr.0, i32 1
store i8 0, ptr %ptr.addr.0, align 1
%dec = add nsw i32 %len.addr.0, -1
%tobool = icmp eq i32 %len.addr.0, 0
br i1 %tobool, label %do.end, label %do.body
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/Transforms/LoopVectorize/ee-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; CHECK-LABEL: @_Z4foo1Pii(
; CHECK-NOT: <4 x i32>
; CHECK: ret
define i32 @_Z4foo1Pii(i32* %A, i32 %n, <2 x i32> %q) #0 {
define i32 @_Z4foo1Pii(ptr %A, i32 %n, <2 x i32> %q) #0 {
entry:
%idx.ext = sext i32 %n to i64
%add.ptr = getelementptr inbounds i32, i32* %A, i64 %idx.ext
%add.ptr = getelementptr inbounds i32, ptr %A, i64 %idx.ext
%cmp3.i = icmp eq i32 %n, 0
br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i

for.body.i: ; preds = %entry, %for.body.i
%__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
%__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
%0 = load i32, i32* %__first.addr.04.i, align 4
%__first.addr.04.i = phi ptr [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
%0 = load i32, ptr %__first.addr.04.i, align 4
%q1 = extractelement <2 x i32> %q, i32 %n
%q2 = add nsw i32 %0, %q1
%add.i = add nsw i32 %q2, %__init.addr.05.i
%incdec.ptr.i = getelementptr inbounds i32, i32* %__first.addr.04.i, i64 1
%cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
%incdec.ptr.i = getelementptr inbounds i32, ptr %__first.addr.04.i, i64 1
%cmp.i = icmp eq ptr %incdec.ptr.i, %add.ptr
br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i

_ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %for.body.i, %entry
Expand Down
1,828 changes: 892 additions & 936 deletions llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll

Large diffs are not rendered by default.

690 changes: 345 additions & 345 deletions llvm/test/Transforms/LoopVectorize/global_alias.ll

Large diffs are not rendered by default.

30 changes: 15 additions & 15 deletions llvm/test/Transforms/LoopVectorize/induction-ptrcasts.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@

@f = external dso_local global i32, align 4

define void @int_iv_based_on_pointer_iv(i8* %A) {
define void @int_iv_based_on_pointer_iv(ptr %A) {
; VF1-LABEL: @int_iv_based_on_pointer_iv(
; VF1: vector.body:
; VF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
; VF1-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
; VF1-NEXT: [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF1-NEXT: [[INDUCTION3:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[INDUCTION]]
; VF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[INDUCTION3]]
; VF1-NEXT: store i8 0, i8* [[TMP7]], align 1
; VF1-NEXT: store i8 0, i8* [[TMP8]], align 1
; VF1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDUCTION]]
; VF1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDUCTION3]]
; VF1-NEXT: store i8 0, ptr [[TMP7]], align 1
; VF1-NEXT: store i8 0, ptr [[TMP8]], align 1
; VF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF1-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]],
; VF1-NEXT: br i1 [[TMP13]], label %middle.block, label %vector.body
Expand All @@ -25,10 +25,10 @@ define void @int_iv_based_on_pointer_iv(i8* %A) {
; VF2-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
; VF2-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
; VF2-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 4
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP3]]
; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP4]]
; VF2-NEXT: store i8 0, i8* [[TMP9]], align 1
; VF2-NEXT: store i8 0, i8* [[TMP10]], align 1
; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]]
; VF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
; VF2-NEXT: store i8 0, ptr [[TMP9]], align 1
; VF2-NEXT: store i8 0, ptr [[TMP10]], align 1
; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; VF2-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]],
; VF2-NEXT: br i1 [[TMP14]], label %middle.block, label %vector.body
Expand All @@ -38,12 +38,12 @@ entry:

loop:
%iv.int = phi i64 [ 0, %entry ], [ %iv.int.next, %loop ]
%iv.ptr = phi i32* [ null, %entry ], [ %iv.ptr.next, %loop ]
%iv.ptr.next = getelementptr inbounds i32, i32* %iv.ptr, i64 1
%gep.A = getelementptr inbounds i8, i8* %A, i64 %iv.int
store i8 0, i8* %gep.A
%iv.int.next = ptrtoint i32* %iv.ptr.next to i64
%sub.ptr.sub = sub i64 ptrtoint (i32* @f to i64), %iv.int.next
%iv.ptr = phi ptr [ null, %entry ], [ %iv.ptr.next, %loop ]
%iv.ptr.next = getelementptr inbounds i32, ptr %iv.ptr, i64 1
%gep.A = getelementptr inbounds i8, ptr %A, i64 %iv.int
store i8 0, ptr %gep.A
%iv.int.next = ptrtoint ptr %iv.ptr.next to i64
%sub.ptr.sub = sub i64 ptrtoint (ptr @f to i64), %iv.int.next
%cmp = icmp sgt i64 %sub.ptr.sub, 0
br i1 %cmp, label %loop, label %exit

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopVectorize/nsw-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ while.body.lr.ph:
br label %while.body

while.body:
%it.sroa.0.091 = phi i32* [ undef, %while.body.lr.ph ], [ %incdec.ptr.i, %while.body ]
%incdec.ptr.i = getelementptr inbounds i32, i32* %it.sroa.0.091, i64 1
%it.sroa.0.091 = phi ptr [ undef, %while.body.lr.ph ], [ %incdec.ptr.i, %while.body ]
%incdec.ptr.i = getelementptr inbounds i32, ptr %it.sroa.0.091, i64 1
%inc32 = add i32 undef, 1 ; <------------- Make sure we don't set NSW flags to the undef.
%cmp.i11 = icmp eq i32* %incdec.ptr.i, undef
%cmp.i11 = icmp eq ptr %incdec.ptr.i, undef
br i1 %cmp.i11, label %while.end, label %while.body

while.end:
Expand Down
174 changes: 86 additions & 88 deletions llvm/test/Transforms/LoopVectorize/pointer-induction.ll

Large diffs are not rendered by default.

30 changes: 15 additions & 15 deletions llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
@b = common global i32 0, align 4
@f = common global i32 0, align 4
@a = common global i32 0, align 4
@d = common global i32* null, align 8
@e = common global i32* null, align 8
@d = common global ptr null, align 8
@e = common global ptr null, align 8
@c = common global i32 0, align 4

; CHECK-LABEL: @fn1(
Expand All @@ -30,14 +30,14 @@ for.cond4.preheader: ; preds = %for.cond
br i1 %cmp514, label %for.cond7.preheader.lr.ph, label %for.end26

for.cond7.preheader.lr.ph: ; preds = %for.cond4.preheader
%0 = load i32*, i32** @e, align 8, !tbaa !4
%0 = load ptr, ptr @e, align 8, !tbaa !4
br label %for.cond7.preheader

for.cond7.preheader: ; preds = %for.cond7.preheader.lr.ph, %for.inc23
%y.017 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %inc24, %for.inc23 ]
%i.116 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %i.2.lcssa, %for.inc23 ]
%n.015 = phi i32 [ undef, %for.cond7.preheader.lr.ph ], [ %inc25, %for.inc23 ]
%1 = load i32, i32* @b, align 4, !tbaa !5
%1 = load i32, ptr @b, align 4, !tbaa !5
%tobool11 = icmp eq i32 %1, 0
br i1 %tobool11, label %for.inc23, label %for.body8.lr.ph

Expand All @@ -50,9 +50,9 @@ for.body8: ; preds = %for.body8.lr.ph, %f
%i.213 = phi i32 [ %i.116, %for.body8.lr.ph ], [ 0, %for.inc19 ]
%2 = trunc i64 %indvars.iv19 to i32
%add10 = add i32 %add9, %2
store i32 %add10, i32* @f, align 4, !tbaa !5
store i32 %add10, ptr @f, align 4, !tbaa !5
%idx.ext = sext i32 %add10 to i64
%add.ptr = getelementptr inbounds i32, i32* @a, i64 %idx.ext
%add.ptr = getelementptr inbounds i32, ptr @a, i64 %idx.ext
%tobool129 = icmp eq i32 %i.213, 0
br i1 %tobool129, label %for.inc19, label %for.body13.lr.ph

Expand All @@ -63,10 +63,10 @@ for.body13.lr.ph: ; preds = %for.body8
for.body13: ; preds = %for.body13.lr.ph, %for.body13
%indvars.iv = phi i64 [ %3, %for.body13.lr.ph ], [ %indvars.iv.next, %for.body13 ]
%add.ptr.sum = add i64 %idx.ext, %indvars.iv
%arrayidx = getelementptr inbounds i32, i32* @a, i64 %add.ptr.sum
%4 = load i32, i32* %arrayidx, align 4, !tbaa !5
%arrayidx15 = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
store i32 %4, i32* %arrayidx15, align 4, !tbaa !5
%arrayidx = getelementptr inbounds i32, ptr @a, i64 %add.ptr.sum
%4 = load i32, ptr %arrayidx, align 4, !tbaa !5
%arrayidx15 = getelementptr inbounds i32, ptr %0, i64 %indvars.iv
store i32 %4, ptr %arrayidx15, align 4, !tbaa !5
%indvars.iv.next = add i64 %indvars.iv, 1
%5 = trunc i64 %indvars.iv.next to i32
%tobool12 = icmp eq i32 %5, 0
Expand All @@ -76,17 +76,17 @@ for.cond11.for.inc19_crit_edge: ; preds = %for.body13
br label %for.inc19

for.inc19: ; preds = %for.cond11.for.inc19_crit_edge, %for.body8
%6 = load i32, i32* @c, align 4, !tbaa !5
%6 = load i32, ptr @c, align 4, !tbaa !5
%inc20 = add nsw i32 %6, 1
store i32 %inc20, i32* @c, align 4, !tbaa !5
store i32 %inc20, ptr @c, align 4, !tbaa !5
%indvars.iv.next20 = add i64 %indvars.iv19, 1
%7 = load i32, i32* @b, align 4, !tbaa !5
%7 = load i32, ptr @b, align 4, !tbaa !5
%tobool = icmp eq i32 %7, 0
br i1 %tobool, label %for.cond7.for.inc23_crit_edge, label %for.body8

for.cond7.for.inc23_crit_edge: ; preds = %for.inc19
%add.ptr.lcssa = phi i32* [ %add.ptr, %for.inc19 ]
store i32* %add.ptr.lcssa, i32** @d, align 8, !tbaa !4
%add.ptr.lcssa = phi ptr [ %add.ptr, %for.inc19 ]
store ptr %add.ptr.lcssa, ptr @d, align 8, !tbaa !4
br label %for.inc23

for.inc23: ; preds = %for.cond7.for.inc23_crit_edge, %for.cond7.preheader
Expand Down
301 changes: 203 additions & 98 deletions llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll

Large diffs are not rendered by default.

26 changes: 13 additions & 13 deletions llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

define dso_local void @foo(i8* noalias nocapture readonly %A, i8* noalias nocapture readonly %B, i8* noalias nocapture %C, i32 %N) {
define dso_local void @foo(ptr noalias nocapture readonly %A, ptr noalias nocapture readonly %B, ptr noalias nocapture %C, i32 %N) {
entry:
%cmp6 = icmp eq i32 %N, 0
br i1 %cmp6, label %while.end, label %while.body.preheader
Expand All @@ -19,16 +19,16 @@ while.body.preheader:

while.body:
%N.addr.010 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
%C.addr.09 = phi i8* [ %incdec.ptr4, %while.body ], [ %C, %while.body.preheader ]
%B.addr.08 = phi i8* [ %incdec.ptr1, %while.body ], [ %B, %while.body.preheader ]
%A.addr.07 = phi i8* [ %incdec.ptr, %while.body ], [ %A, %while.body.preheader ]
%incdec.ptr = getelementptr inbounds i8, i8* %A.addr.07, i32 1
%0 = load i8, i8* %A.addr.07, align 1
%incdec.ptr1 = getelementptr inbounds i8, i8* %B.addr.08, i32 1
%1 = load i8, i8* %B.addr.08, align 1
%C.addr.09 = phi ptr [ %incdec.ptr4, %while.body ], [ %C, %while.body.preheader ]
%B.addr.08 = phi ptr [ %incdec.ptr1, %while.body ], [ %B, %while.body.preheader ]
%A.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %A, %while.body.preheader ]
%incdec.ptr = getelementptr inbounds i8, ptr %A.addr.07, i32 1
%0 = load i8, ptr %A.addr.07, align 1
%incdec.ptr1 = getelementptr inbounds i8, ptr %B.addr.08, i32 1
%1 = load i8, ptr %B.addr.08, align 1
%add = add i8 %1, %0
%incdec.ptr4 = getelementptr inbounds i8, i8* %C.addr.09, i32 1
store i8 %add, i8* %C.addr.09, align 1
%incdec.ptr4 = getelementptr inbounds i8, ptr %C.addr.09, i32 1
store i8 %add, ptr %C.addr.09, align 1
%dec = add i32 %N.addr.010, -1
%cmp = icmp eq i32 %dec, 0
br i1 %cmp, label %while.end.loopexit, label %while.body
Expand All @@ -43,7 +43,7 @@ while.end:
; Make sure a loop is successfully vectorized with fold-tail when the backedge
; taken count is constant and used inside the loop. Issue revealed by D76992.
;
define void @reuse_const_btc(i8* %A) optsize {
define void @reuse_const_btc(ptr %A) optsize {
; CHECK-LABEL: @reuse_const_btc
; CHECK: {{%.*}} = icmp ule <4 x i32> {{%.*}}, <i32 13, i32 13, i32 13, i32 13>
; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> <i32 12, i32 12, i32 12, i32 12>, <4 x i32> <i32 13, i32 13, i32 13, i32 13>
Expand All @@ -54,7 +54,7 @@ entry:
loop:
%riv = phi i32 [ 13, %entry ], [ %rivMinus1, %merge ]
%sub = sub nuw nsw i32 20, %riv
%arrayidx = getelementptr inbounds i8, i8* %A, i32 %sub
%arrayidx = getelementptr inbounds i8, ptr %A, i32 %sub
%cond0 = icmp eq i32 %riv, 7
br i1 %cond0, label %then, label %else
then:
Expand All @@ -64,7 +64,7 @@ else:
merge:
%blend = phi i32 [ 13, %then ], [ 12, %else ]
%trunc = trunc i32 %blend to i8
store i8 %trunc, i8* %arrayidx, align 1
store i8 %trunc, ptr %arrayidx, align 1
%rivMinus1 = add nuw nsw i32 %riv, -1
%cond = icmp eq i32 %riv, 0
br i1 %cond, label %exit, label %loop
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,53 +9,53 @@
; CHECK-REMARKS-NEXT: remark: {{.*}} interleaved loop (interleaved count: 4)
; CHECK-REMARKS-NOT: remark: {{.*}} vectorized loop

define void @VF1-VPlanExe(i32* %dst) {
define void @VF1-VPlanExe(ptr %dst) {
; CHECK-LABEL: @VF1-VPlanExe(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ]
; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[VEC_IV1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[VEC_IV2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IV3:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 14
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 14
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 14
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 14
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV1]], 14
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV2]], 14
; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV3]], 14
; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[INDUCTION]]
; CHECK-NEXT: store i32 0, i32* [[TMP4]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP4]]
; CHECK-NEXT: store i32 0, ptr [[TMP5]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
; CHECK: pred.store.if4:
; CHECK-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION1]]
; CHECK-NEXT: store i32 0, i32* [[TMP5]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]]
; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]]
; CHECK: pred.store.continue5:
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
; CHECK: pred.store.if6:
; CHECK-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION2]]
; CHECK-NEXT: store i32 0, i32* [[TMP6]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP8]]
; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]]
; CHECK: pred.store.continue7:
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
; CHECK: pred.store.if8:
; CHECK-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION3]]
; CHECK-NEXT: store i32 0, i32* [[TMP7]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP10]]
; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]]
; CHECK: pred.store.continue9:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
Expand All @@ -65,8 +65,8 @@ define void @VF1-VPlanExe(i32* %dst) {
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 0, i32* [[DST_PTR]], align 4
; CHECK-NEXT: [[DST_PTR:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i32 0, ptr [[DST_PTR]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
Expand All @@ -79,20 +79,20 @@ for.cond.cleanup:

for.body:
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%dst.ptr = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
store i32 0, i32* %dst.ptr
%dst.ptr = getelementptr inbounds i32, ptr %dst, i64 %indvars.iv
store i32 0, ptr %dst.ptr
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 15
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}

define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) {
define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) {
; CHECK-LABEL: @VF1-VPWidenCanonicalIVRecipeExe(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds double, double* [[PTR1:%.*]], i64 15
; CHECK-NEXT: [[PTR2:%.*]] = getelementptr inbounds double, ptr [[PTR1:%.*]], i64 15
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr double, double* [[PTR1]], i64 16
; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR1]], i64 128
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
Expand All @@ -107,59 +107,65 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) {
; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP4]]
; CHECK-NEXT: store double 0.000000e+00, double* [[NEXT_GEP]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP5]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP5]]
; CHECK-NEXT: store double 0.000000e+00, double* [[NEXT_GEP1]], align 8
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP7]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP1]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
; CHECK: pred.store.if9:
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP6]]
; CHECK-NEXT: store double 0.000000e+00, double* [[NEXT_GEP2]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP9]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP2]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
; CHECK: pred.store.continue10:
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.if11:
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP7]]
; CHECK-NEXT: store double 0.000000e+00, double* [[NEXT_GEP3]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 8
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR1]], i64 [[TMP11]]
; CHECK-NEXT: store double 0.000000e+00, ptr [[NEXT_GEP3]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
; CHECK: pred.store.continue12:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR1]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[PTR1]], [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[ADDR:%.*]] = phi double* [ [[PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: store double 0.000000e+00, double* [[ADDR]], align 8
; CHECK-NEXT: [[PTR]] = getelementptr inbounds double, double* [[ADDR]], i64 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq double* [[PTR]], [[PTR2]]
; CHECK-NEXT: [[ADDR:%.*]] = phi ptr [ [[PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: store double 0.000000e+00, ptr [[ADDR]], align 8
; CHECK-NEXT: [[PTR]] = getelementptr inbounds double, ptr [[ADDR]], i64 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[PTR]], [[PTR2]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
;
entry:
%ptr2 = getelementptr inbounds double, double* %ptr1, i64 15
%ptr2 = getelementptr inbounds double, ptr %ptr1, i64 15
br label %for.body

for.cond.cleanup:
ret void

for.body:
%addr = phi double* [ %ptr, %for.body ], [ %ptr1, %entry ]
store double 0.0, double* %addr
%ptr = getelementptr inbounds double, double* %addr, i64 1
%cond = icmp eq double* %ptr, %ptr2
%addr = phi ptr [ %ptr, %for.body ], [ %ptr1, %entry ]
store double 0.0, ptr %addr
%ptr = getelementptr inbounds double, ptr %addr, i64 1
%cond = icmp eq ptr %ptr, %ptr2
br i1 %cond, label %for.cond.cleanup, label %for.body
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-REMARKS: {{.*}}
100 changes: 48 additions & 52 deletions llvm/test/Transforms/LoopVectorize/use-scalar-epilogue-if-tp-fails.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,135 +12,131 @@

target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

define void @basic_loop(i8* nocapture readonly %ptr, i32 %size, i8** %pos) {
define void @basic_loop(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
; CHECK-LABEL: @basic_loop(
; CHECK-NEXT: header:
; CHECK-NEXT: [[PTR0:%.*]] = load i8*, i8** [[POS:%.*]], align 4
; CHECK-NEXT: [[PTR0:%.*]] = load ptr, ptr [[POS:%.*]], align 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SIZE]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SIZE]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[SIZE]], [[N_VEC]]
; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 [[N_VEC]]
; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>*
; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP5]], align 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SIZE]], [[HEADER:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: [[DEC66:%.*]] = phi i32 [ [[DEC:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[BUFF:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[BUFF]], i32 1
; CHECK-NEXT: [[BUFF:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[BUFF]], i32 1
; CHECK-NEXT: [[DEC]] = add nsw i32 [[DEC66]], -1
; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
; CHECK-NEXT: store i8 [[TMP7]], i8* [[BUFF]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: store i8 [[TMP5]], ptr [[BUFF]], align 1
; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK: end:
; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4
; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store ptr [[INCDEC_PTR_LCSSA]], ptr [[POS]], align 4
; CHECK-NEXT: ret void
;
header:
%ptr0 = load i8*, i8** %pos, align 4
%ptr0 = load ptr, ptr %pos, align 4
br label %body

body:
%dec66 = phi i32 [ %dec, %body ], [ %size, %header ]
%buff = phi i8* [ %incdec.ptr, %body ], [ %ptr, %header ]
%incdec.ptr = getelementptr inbounds i8, i8* %buff, i32 1
%buff = phi ptr [ %incdec.ptr, %body ], [ %ptr, %header ]
%incdec.ptr = getelementptr inbounds i8, ptr %buff, i32 1
%dec = add nsw i32 %dec66, -1
%0 = load i8, i8* %incdec.ptr, align 1
store i8 %0, i8* %buff, align 1
%0 = load i8, ptr %incdec.ptr, align 1
store i8 %0, ptr %buff, align 1
%tobool11 = icmp eq i32 %dec, 0
br i1 %tobool11, label %end, label %body

end:
store i8* %incdec.ptr, i8** %pos, align 4
store ptr %incdec.ptr, ptr %pos, align 4
ret void
}

define void @metadata(i8* nocapture readonly %ptr, i32 %size, i8** %pos) {
define void @metadata(ptr nocapture readonly %ptr, i32 %size, ptr %pos) {
; CHECK-LABEL: @metadata(
; CHECK-NEXT: header:
; CHECK-NEXT: [[PTR0:%.*]] = load i8*, i8** [[POS:%.*]], align 4
; CHECK-NEXT: [[PTR0:%.*]] = load ptr, ptr [[POS:%.*]], align 4
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SIZE:%.*]], 4
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SIZE]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SIZE]], [[N_MOD_VF]]
; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[SIZE]], [[N_VEC]]
; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, i8* [[PTR:%.*]], i32 [[N_VEC]]
; CHECK-NEXT: [[IND_END1:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i32 [[N_VEC]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[PTR]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[NEXT_GEP]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[NEXT_GEP]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to <4 x i8>*
; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], <4 x i8>* [[TMP5]], align 1
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[SIZE]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SIZE]], [[HEADER:%.*]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i8* [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ [[PTR]], [[HEADER]] ]
; CHECK-NEXT: br label [[BODY:%.*]]
; CHECK: body:
; CHECK-NEXT: [[DEC66:%.*]] = phi i32 [ [[DEC:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[BUFF:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[BUFF]], i32 1
; CHECK-NEXT: [[BUFF:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[BODY]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[BUFF]], i32 1
; CHECK-NEXT: [[DEC]] = add nsw i32 [[DEC66]], -1
; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
; CHECK-NEXT: store i8 [[TMP7]], i8* [[BUFF]], align 1
; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1
; CHECK-NEXT: store i8 [[TMP5]], ptr [[BUFF]], align 1
; CHECK-NEXT: [[TOBOOL11:%.*]] = icmp eq i32 [[DEC]], 0
; CHECK-NEXT: br i1 [[TOBOOL11]], label [[END]], label [[BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: end:
; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i8* [[INCDEC_PTR_LCSSA]], i8** [[POS]], align 4
; CHECK-NEXT: [[INCDEC_PTR_LCSSA:%.*]] = phi ptr [ [[INCDEC_PTR]], [[BODY]] ], [ [[IND_END1]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store ptr [[INCDEC_PTR_LCSSA]], ptr [[POS]], align 4
; CHECK-NEXT: ret void
;
header:
%ptr0 = load i8*, i8** %pos, align 4
%ptr0 = load ptr, ptr %pos, align 4
br label %body

body:
%dec66 = phi i32 [ %dec, %body ], [ %size, %header ]
%buff = phi i8* [ %incdec.ptr, %body ], [ %ptr, %header ]
%incdec.ptr = getelementptr inbounds i8, i8* %buff, i32 1
%buff = phi ptr [ %incdec.ptr, %body ], [ %ptr, %header ]
%incdec.ptr = getelementptr inbounds i8, ptr %buff, i32 1
%dec = add nsw i32 %dec66, -1
%0 = load i8, i8* %incdec.ptr, align 1
store i8 %0, i8* %buff, align 1
%0 = load i8, ptr %incdec.ptr, align 1
store i8 %0, ptr %buff, align 1
%tobool11 = icmp eq i32 %dec, 0
br i1 %tobool11, label %end, label %body, !llvm.loop !1

end:
store i8* %incdec.ptr, i8** %pos, align 4
store ptr %incdec.ptr, ptr %pos, align 4
ret void
}

Expand Down