Skip to content

Commit

Permalink
[LV] Add additional cost model tests with inductions and truncates.
Browse files Browse the repository at this point in the history
Add test coverage for additional cases not covered by current tests with
multiple inductions and truncates.
  • Loading branch information
fhahn committed Apr 23, 2024
1 parent ce763bf commit 55fc5eb
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 0 deletions.
73 changes: 73 additions & 0 deletions llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,79 @@ exit:
ret void
}

define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
entry:
br label %loop

loop:
%iv.1 = phi i32 [ 100, %entry ], [ %dec, %loop ]
%iv.2 = phi i32 [ 2048, %entry ], [ %add38, %loop ]
%ptr.iv.1 = phi ptr [ %A, %entry ], [ %outptr.0, %loop ]
%ptr.iv.2 = phi ptr [ %B, %entry ], [ %incdec.ptr36, %loop ]
%ptr.iv.3 = phi ptr [ %B, %entry ], [ %incdec.ptr33, %loop ]
%incdec.ptr33 = getelementptr i8, ptr %ptr.iv.3, i64 1
%0 = load i8, ptr %ptr.iv.3, align 1
%conv34 = zext i8 %0 to i32
%incdec.ptr36 = getelementptr i8, ptr %ptr.iv.2, i64 1
%1 = load i8, ptr %ptr.iv.2, align 1
%conv37 = zext i8 %1 to i32
%add38 = add i32 %conv34, %conv37
%shr42 = lshr i32 %iv.2, 1
%conv43 = trunc i32 %shr42 to i8
store i8 %conv43, ptr %ptr.iv.1, align 1
%dec = add i32 %iv.1, 1
%outptr.0 = getelementptr i8, ptr %ptr.iv.1, i64 2
%cmp30.not = icmp eq i32 %dec, 0
br i1 %cmp30.not, label %exit, label %loop

exit:
ret void
}

define i16 @iv_and_step_trunc() {
entry:
br label %loop

loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rec = phi i16 [ 0, %entry ], [ %rec.next, %loop ]
%iv.next = add i64 %iv, 1
%0 = trunc i64 %iv to i16
%1 = trunc i64 %iv.next to i16
%rec.next = mul i16 %0, %1
%ec = icmp eq i64 %iv, 1
br i1 %ec, label %exit, label %loop

exit:
ret i16 %rec
}

define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
entry:
br label %loop.header

loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp9.not = icmp ule i64 %iv, %y
br i1 %cmp9.not, label %loop.latch, label %if.then

if.then:
%or = or i64 %x, %iv
%gep = getelementptr i32, ptr %A, i64 %iv
%t = trunc i64 %or to i32
store i32 %t, ptr %gep, align 4
br label %loop.latch

loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 100
br i1 %ec, label %exit, label %loop.header

exit:
ret i32 0
}


attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
Expand Down
111 changes: 111 additions & 0 deletions llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -p loop-vectorize -mtriple=x86_64-apple-macosx -S %s | FileCheck %s

target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
; CHECK-LABEL: define i32 @test_scalar_predicated_cost(
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[Y]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i64> poison, i64 [[X]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT4]], <8 x i64> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <8 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[VEC_IND]]
; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD]]
; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD1]]
; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD2]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i64> [[TMP12]] to <8 x i32>
; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32>
; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32>
; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32>
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr [[TMP24]], i32 4, <8 x i1> [[TMP8]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr [[TMP25]], i32 4, <8 x i1> [[TMP9]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]])
; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
; CHECK: loop.header:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT: [[CMP9_NOT:%.*]] = icmp ule i64 [[IV]], [[Y]]
; CHECK-NEXT: br i1 [[CMP9_NOT]], label [[LOOP_LATCH]], label [[IF_THEN:%.*]]
; CHECK: if.then:
; CHECK-NEXT: [[OR:%.*]] = or i64 [[X]], [[IV]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[T:%.*]] = trunc i64 [[OR]] to i32
; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 4
; CHECK-NEXT: br label [[LOOP_LATCH]]
; CHECK: loop.latch:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret i32 0
;
entry:
br label %loop.header

loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%cmp9.not = icmp ule i64 %iv, %y
br i1 %cmp9.not, label %loop.latch, label %if.then

if.then:
%or = or i64 %x, %iv
%gep = getelementptr i32, ptr %A, i64 %iv
%t = trunc i64 %or to i32
store i32 %t, ptr %gep, align 4
br label %loop.latch

loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 100
br i1 %ec, label %exit, label %loop.header

exit:
ret i32 0
}

attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" }
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
;.

0 comments on commit 55fc5eb

Please sign in to comment.