Skip to content

Commit

Permalink
[LV] Scalarize instructions marked scalar after vectorization
Browse files Browse the repository at this point in the history
This patch ensures that we actually scalarize instructions marked scalar after
vectorization. Previously, such instructions may have been vectorized instead.

Differential Revision: https://reviews.llvm.org/D23889

llvm-svn: 282418
  • Loading branch information
mssimpso committed Sep 26, 2016
1 parent 5fa302c commit b764aba
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 7 deletions.
9 changes: 9 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -4506,6 +4506,15 @@ static bool mayDivideByZero(Instruction &I) {
void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// For each instruction in the old loop.
for (Instruction &I : *BB) {

// Scalarize instructions that should remain scalar after vectorization.
if (!(isa<BranchInst>(&I) || isa<PHINode>(&I) ||
isa<DbgInfoIntrinsic>(&I)) &&
Legal->isScalarAfterVectorization(&I)) {
scalarizeInstruction(&I);
continue;
}

switch (I.getOpcode()) {
case Instruction::Br:
// Nothing to do for PHIs and BR, since we already took care of the
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
@@ -1,5 +1,6 @@
; RUN: opt < %s -loop-vectorize -S | FileCheck %s

; CHECK: vector.body:
; CHECK: fadd
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
Expand All @@ -12,9 +13,8 @@
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
; CHECK-NEXT: =
; CHECK-NOT: fadd
; CHECK-SAME: >
; CHECK: middle.block

target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-ibm-linux-gnu"
Expand Down
Expand Up @@ -43,7 +43,7 @@ for.end12: ; preds = %for.end, %entry

; CHECK-LABEL: @s173
; CHECK: load <4 x float>, <4 x float>*
; CHECK: add i64 %index, 16000
; CHECK: add nsw i64 %index, 16000
; CHECK: ret i32 0
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopVectorize/global_alias.ll
Expand Up @@ -387,7 +387,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias08(
; CHECK: sub <4 x i32>
; CHECK: load <4 x i32>
; CHECK: ret

define i32 @noAlias08(i32 %a) #0 {
Expand Down Expand Up @@ -439,7 +439,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias09(
; CHECK: sub <4 x i32>
; CHECK: load <4 x i32>
; CHECK: ret

define i32 @noAlias09(i32 %a) #0 {
Expand Down Expand Up @@ -721,7 +721,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias14(
; CHECK: sub <4 x i32>
; CHECK: load <4 x i32>
; CHECK: ret

define i32 @noAlias14(i32 %a) #0 {
Expand Down
4 changes: 3 additions & 1 deletion llvm/test/Transforms/LoopVectorize/induction_plus.ll
Expand Up @@ -9,7 +9,9 @@ target triple = "x86_64-apple-macosx10.8.0"
;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
;CHECK: %vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %vec.ind.next, %vector.body ]
;CHECK: %vec.ind1 = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next2, %vector.body ]
;CHECK: add nsw <4 x i64> %vec.ind, <i64 12, i64 12, i64 12, i64 12>
;CHECK: %[[T1:.+]] = add i64 %index, 0
;CHECK: %[[T2:.+]] = add nsw i64 %[[T1]], 12
;CHECK: getelementptr inbounds [1024 x i32], [1024 x i32]* @array, i64 0, i64 %[[T2]]
;CHECK: %vec.ind.next = add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
;CHECK: %vec.ind.next2 = add <4 x i32> %vec.ind1, <i32 4, i32 4, i32 4, i32 4>
;CHECK: ret i32
Expand Down
74 changes: 74 additions & 0 deletions llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll
@@ -0,0 +1,74 @@
; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=2 -loop-vectorize -instcombine -S | FileCheck %s
; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=2 -loop-vectorize -S | FileCheck %s --check-prefix=NO-IC

target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"

; CHECK-LABEL: @scalar_after_vectorization_0
;
; CHECK: vector.body:
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: %offset.idx = or i64 %index, 1
; CHECK: %[[T2:.+]] = add nuw nsw i64 %offset.idx, %tmp0
; CHECK: %[[T3:.+]] = sub nsw i64 %[[T2]], %x
; CHECK: %[[T4:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T3]]
; CHECK: %[[T5:.+]] = bitcast i32* %[[T4]] to <4 x i32>*
; CHECK: load <4 x i32>, <4 x i32>* %[[T5]], align 4
; CHECK: %[[T6:.+]] = getelementptr i32, i32* %[[T4]], i64 4
; CHECK: %[[T7:.+]] = bitcast i32* %[[T6]] to <4 x i32>*
; CHECK: load <4 x i32>, <4 x i32>* %[[T7]], align 4
; CHECK: br {{.*}}, label %middle.block, label %vector.body
;
; NO-IC-LABEL: @scalar_after_vectorization_0
;
; NO-IC: vector.body:
; NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; NO-IC: %offset.idx = add i64 1, %index
; NO-IC: %[[T2:.+]] = add i64 %offset.idx, 0
; NO-IC: %[[T3:.+]] = add i64 %offset.idx, 4
; NO-IC: %[[T4:.+]] = add nuw nsw i64 %[[T2]], %tmp0
; NO-IC: %[[T5:.+]] = add nuw nsw i64 %[[T3]], %tmp0
; NO-IC: %[[T6:.+]] = sub nsw i64 %[[T4]], %x
; NO-IC: %[[T7:.+]] = sub nsw i64 %[[T5]], %x
; NO-IC: %[[T8:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T6]]
; NO-IC: %[[T9:.+]] = getelementptr inbounds i32, i32* %a, i64 %[[T7]]
; NO-IC: %[[T10:.+]] = getelementptr i32, i32* %[[T8]], i32 0
; NO-IC: %[[T11:.+]] = bitcast i32* %[[T10]] to <4 x i32>*
; NO-IC: load <4 x i32>, <4 x i32>* %[[T11]], align 4
; NO-IC: %[[T12:.+]] = getelementptr i32, i32* %[[T8]], i32 4
; NO-IC: %[[T13:.+]] = bitcast i32* %[[T12]] to <4 x i32>*
; NO-IC: load <4 x i32>, <4 x i32>* %[[T13]], align 4
; NO-IC: br {{.*}}, label %middle.block, label %vector.body
;
define void @scalar_after_vectorization_0(i32* noalias %a, i32* noalias %b, i64 %x, i64 %y) {

outer.ph:
br label %outer.body

outer.body:
%i = phi i64 [ 1, %outer.ph ], [ %i.next, %inner.end ]
%tmp0 = mul nuw nsw i64 %i, %x
br label %inner.ph

inner.ph:
br label %inner.body

inner.body:
%j = phi i64 [ 1, %inner.ph ], [ %j.next, %inner.body ]
%tmp1 = add nuw nsw i64 %j, %tmp0
%tmp2 = sub nsw i64 %tmp1, %x
%tmp3 = getelementptr inbounds i32, i32* %a, i64 %tmp2
%tmp4 = load i32, i32* %tmp3, align 4
%tmp5 = getelementptr inbounds i32, i32* %b, i64 %tmp1
store i32 %tmp4, i32* %tmp5, align 4
%j.next = add i64 %j, 1
%cond.j = icmp slt i64 %j.next, %y
br i1 %cond.j, label %inner.body, label %inner.end

inner.end:
%i.next = add i64 %i, 1
%cond.i = icmp slt i64 %i.next, %y
br i1 %cond.i, label %outer.body, label %outer.end

outer.end:
ret void
}

0 comments on commit b764aba

Please sign in to comment.