diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll index 7bf4fbd89b0ee..eb5e892a7b7b2 100644 --- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll +++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll @@ -1,6 +1,5 @@ -; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6 +; RUN: opt -passes=loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s ; Check that we version this loop with speculating the value 1 for symbolic ; strides. This also checks that the symbolic stride information is correctly @@ -8,50 +7,60 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; vectorize because we couldn't determine the array bounds for the required ; memchecks. -; CHECK-LABEL: test -define void @test(ptr %A, i64 %AStride, - ptr %B, i32 %BStride, - ptr %C, i64 %CStride, i32 %N) { +define void @test(ptr noalias %A, i64 %AStride, ptr noalias %B, i32 %BStride, ptr noalias %C, i64 %CStride) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr noalias [[A:%.*]], i64 [[ASTRIDE:%.*]], ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]], ptr noalias [[C:%.*]], i64 [[CSTRIDE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[IDENT_CHECK1:%.*]] = icmp ne i32 [[BSTRIDE]], 1 +; CHECK-NEXT: [[IDENT_CHECK2:%.*]] = icmp ne i64 [[CSTRIDE]], 1 +; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ne i64 [[ASTRIDE]], 1 +; CHECK-NEXT: [[DIFF_CHECK:%.*]] = or i1 [[IDENT_CHECK1]], [[IDENT_CHECK2]] +; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK6]] +; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP5]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = mul nsw <2 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH]]: +; entry: - %cmp13 = icmp eq i32 %N, 0 - br i1 %cmp13, label %for.end, label %for.body.preheader - -; CHECK-DAG: icmp ne i64 %AStride, 1 -; CHECK-DAG: icmp ne i32 %BStride, 1 -; CHECK-DAG: icmp ne i64 %CStride, 1 -; CHECK: or -; CHECK: or -; CHECK: br - -; CHECK: vector.body -; CHECK: load <2 x i32> + br label %loop -for.body.preheader: - br label %for.body - -for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %iv.trunc = trunc i64 %indvars.iv to i32 +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %iv.trunc = trunc i64 %iv to i32 %mul = mul i32 %iv.trunc, %BStride %mul64 = zext i32 %mul to i64 - %arrayidx = getelementptr inbounds i32, ptr %B, i64 %mul64 - %0 = load i32, ptr %arrayidx, align 4 - %mul2 = mul nsw i64 %indvars.iv, %CStride - %arrayidx3 = getelementptr inbounds i32, ptr %C, i64 %mul2 - %1 = load i32, ptr %arrayidx3, align 4 + %gep.x = getelementptr inbounds i32, ptr %B, i64 %mul64 + %0 = load i32, ptr %gep.x, align 4 + %mul2 = mul nsw i64 %iv, %CStride + %gep.c = getelementptr inbounds i32, ptr %C, i64 %mul2 + %1 = load i32, ptr %gep.c, align 4 %mul4 = mul nsw i32 %1, %0 - %mul3 = mul nsw i64 %indvars.iv, %AStride - %arrayidx7 = getelementptr inbounds i32, ptr %A, i64 %mul3 - store i32 %mul4, ptr %arrayidx7, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %N - br i1 %exitcond, label %for.end.loopexit, label %for.body - -for.end.loopexit: - br label %for.end - -for.end: + %mul3 = mul nsw i64 %iv, %AStride + %gep.a = getelementptr inbounds i32, ptr %A, i64 %mul3 + store i32 %mul4, ptr %gep.a, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: ret void } @@ -59,36 +68,57 @@ for.end: ; replacing the symbolic stride '%conv'. ; PR18480 -; CHECK-LABEL: fn1 -; CHECK: load <2 x double> - define void @fn1(ptr noalias %x, ptr noalias %c, double %a) { +; CHECK-LABEL: define void @fn1( +; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[C:%.*]], double [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CONV:%.*]] = fptosi double [[A]] to i32 +; CHECK-NEXT: [[CONV2:%.*]] = add i32 [[CONV]], 4 +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV2]], 0 +; CHECK-NEXT: br i1 [[CMP8]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[CONV2]] to i64 +; CHECK-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[CONV]], 1 +; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP2]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: store <2 x double> [[WIDE_LOAD]], ptr [[TMP4]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; entry: %conv = fptosi double %a to i32 %conv2 = add i32 %conv, 4 %cmp8 = icmp sgt i32 %conv2, 0 - br i1 %cmp8, label %for.body.preheader, label %for.end - -for.body.preheader: - br label %for.body + br i1 %cmp8, label %loop, label %exit -for.body: - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] - %0 = trunc i64 %indvars.iv to i32 +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %0 = trunc i64 %iv to i32 %mul = mul nsw i32 %0, %conv - %idxprom = sext i32 %mul to i64 - %arrayidx = getelementptr inbounds double, ptr %x, i64 %idxprom - %1 = load double, ptr %arrayidx, align 8 - %arrayidx3 = getelementptr inbounds double, ptr %c, i64 %indvars.iv - store double %1, ptr %arrayidx3, align 8 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, %conv2 - br i1 %exitcond, label %for.end.loopexit, label %for.body - -for.end.loopexit: - br label %for.end - -for.end: + %mul.ext = sext i32 %mul to i64 + %gep.x = getelementptr inbounds double, ptr %x, i64 %mul.ext + %1 = load double, ptr %gep.x, align 8 + %gep.c = getelementptr inbounds double, ptr %c, i64 %iv + store double %1, ptr %gep.c, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %iv.trunc = trunc i64 %iv.next to i32 + %ec = icmp eq i32 %iv.trunc, %conv2 + br i1 %ec, label %exit, label %loop + +exit: ret void }