68 changes: 34 additions & 34 deletions llvm/test/Analysis/CostModel/X86/arith-fix.ll

Large diffs are not rendered by default.

80 changes: 40 additions & 40 deletions llvm/test/Analysis/CostModel/X86/arith-overflow.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/Analysis/CostModel/X86/arith.ll
Original file line number Diff line number Diff line change
Expand Up @@ -946,9 +946,9 @@ define i32 @mul(i32 %arg) {
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = mul <2 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = mul <4 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I8 = mul <8 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = mul <16 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = mul <32 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = mul <64 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = mul <16 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = mul <32 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = mul <64 x i8> undef, undef
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'mul'
Expand Down Expand Up @@ -1012,9 +1012,9 @@ define i32 @mul(i32 %arg) {
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I8 = mul <2 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I8 = mul <4 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I8 = mul <8 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16I8 = mul <16 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = mul <32 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = mul <64 x i8> undef, undef
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'mul'
Expand Down
129 changes: 75 additions & 54 deletions llvm/test/Analysis/CostModel/X86/cast.ll

Large diffs are not rendered by default.

414 changes: 250 additions & 164 deletions llvm/test/Analysis/CostModel/X86/extend.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) {
; THRU-LABEL: 'umul'
; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
; THRU-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
; THRU-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'umul'
Expand Down

Large diffs are not rendered by default.

36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll

Large diffs are not rendered by default.

86 changes: 60 additions & 26 deletions llvm/test/Analysis/CostModel/X86/min-legal-vector-width.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,21 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512vl,+avx512bw,+avx512dq,-prefer-256-bit | FileCheck %s --check-prefixes=SKX512

define void @zext256() "min-legal-vector-width"="256" {
; VEC256-LABEL: 'zext256'
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <16 x i16> undef to <16 x i32>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
; AVX-LABEL: 'zext256'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <16 x i16> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512VL256-LABEL: 'zext256'
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C = zext <16 x i8> undef to <16 x i32>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <16 x i16> undef to <16 x i32>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512VL512-LABEL: 'zext256'
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = zext <8 x i16> undef to <8 x i64>
Expand All @@ -22,6 +30,14 @@ define void @zext256() "min-legal-vector-width"="256" {
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SKX256-LABEL: 'zext256'
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; SKX256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C = zext <16 x i8> undef to <16 x i32>
; SKX256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <16 x i16> undef to <16 x i32>
; SKX256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SKX512-LABEL: 'zext256'
; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = zext <8 x i32> undef to <8 x i64>
Expand All @@ -40,11 +56,11 @@ define void @zext256() "min-legal-vector-width"="256" {

define void @zext512() "min-legal-vector-width"="512" {
; AVX-LABEL: 'zext512'
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = zext <8 x i16> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %B = zext <8 x i32> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = zext <16 x i8> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %D = zext <16 x i16> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %E = zext <32 x i8> undef to <32 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512VL256-LABEL: 'zext512'
Expand Down Expand Up @@ -88,14 +104,23 @@ define void @zext512() "min-legal-vector-width"="512" {
}

define void @sext256() "min-legal-vector-width"="256" {
; VEC256-LABEL: 'sext256'
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; VEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; VEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; VEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
; AVX-LABEL: 'sext256'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512VL256-LABEL: 'sext256'
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; AVX512VL256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512VL512-LABEL: 'sext256'
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64>
Expand All @@ -106,6 +131,15 @@ define void @sext256() "min-legal-vector-width"="256" {
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; AVX512VL512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SKX256-LABEL: 'sext256'
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; SKX256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; SKX256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; SKX256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; SKX256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; SKX256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SKX512-LABEL: 'sext256'
; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; SKX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %B = sext <8 x i16> undef to <8 x i64>
Expand All @@ -126,12 +160,12 @@ define void @sext256() "min-legal-vector-width"="256" {

define void @sext512() "min-legal-vector-width"="512" {
; AVX-LABEL: 'sext512'
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = sext <8 x i8> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = sext <8 x i16> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %C = sext <8 x i32> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = sext <16 x i8> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %E = sext <16 x i16> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = sext <32 x i8> undef to <32 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; AVX512VL256-LABEL: 'sext512'
Expand Down Expand Up @@ -220,11 +254,11 @@ define i32 @zext256_vXi1() "min-legal-vector-width"="256" {
; AVX-LABEL: 'zext256_vXi1'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = zext <8 x i1> undef to <8 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = zext <16 x i1> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
Expand Down Expand Up @@ -352,12 +386,12 @@ define i32 @sext256_vXi1() "min-legal-vector-width"="256" {
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32>
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sext i1 undef to i16
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
Expand Down
46 changes: 13 additions & 33 deletions llvm/test/Analysis/CostModel/X86/reduce-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -203,35 +203,15 @@ define i32 @reduce_i16(i32 %arg) {
}

define i32 @reduce_i8(i32 %arg) {
; SSE2-LABEL: 'reduce_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'reduce_i8'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'reduce_i8'
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
; SSE-LABEL: 'reduce_i8'
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX1-LABEL: 'reduce_i8'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
Expand All @@ -247,10 +227,10 @@ define i32 @reduce_i8(i32 %arg) {
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8 = call i8 @llvm.vector.reduce.mul.v8i8(<8 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16 = call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V32 = call i8 @llvm.vector.reduce.mul.v32i8(<32 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64 = call i8 @llvm.vector.reduce.mul.v64i8(<64 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128 = call i8 @llvm.vector.reduce.mul.v128i8(<128 x i8> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'reduce_i8'
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/X86/rem.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/sitofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ define i32 @sitofp_i16_float() {
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = sitofp <2 x i16> undef to <2 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = sitofp <4 x i16> undef to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f32 = sitofp <8 x i16> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v16i16_v16f32 = sitofp <16 x i16> undef to <16 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'sitofp_i16_float'
Expand Down
78 changes: 27 additions & 51 deletions llvm/test/Analysis/CostModel/X86/sse-itoi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
define void @zext_v4i8_to_v4i64(<4 x i8>* %a) {
; SSE2-LABEL: 'zext_v4i8_to_v4i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <4 x i8> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = zext <4 x i8> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand Down Expand Up @@ -43,7 +43,7 @@ define void @sext_v4i8_to_v4i64(<4 x i8>* %a) {
define void @zext_v4i16_to_v4i64(<4 x i16>* %a) {
; SSE2-LABEL: 'zext_v4i16_to_v4i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = zext <4 x i16> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <4 x i16> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand All @@ -62,7 +62,7 @@ define void @zext_v4i16_to_v4i64(<4 x i16>* %a) {
define void @sext_v4i16_to_v4i64(<4 x i16>* %a) {
; SSE2-LABEL: 'sext_v4i16_to_v4i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i16>, <4 x i16>* %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %2 = sext <4 x i16> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = sext <4 x i16> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand All @@ -80,17 +80,11 @@ define void @sext_v4i16_to_v4i64(<4 x i16>* %a) {


define void @zext_v4i32_to_v4i64(<4 x i32>* %a) {
; SSE2-LABEL: 'zext_v4i32_to_v4i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = zext <4 x i32> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v4i32_to_v4i64'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i32> %1 to <4 x i64>
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
; CHECK-LABEL: 'zext_v4i32_to_v4i64'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <4 x i32> %1 to <4 x i64>
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%1 = load <4 x i32>, <4 x i32>* %a
%2 = zext <4 x i32> %1 to <4 x i64>
Expand All @@ -101,7 +95,7 @@ define void @zext_v4i32_to_v4i64(<4 x i32>* %a) {
define void @sext_v4i32_to_v4i64(<4 x i32>* %a) {
; SSE2-LABEL: 'sext_v4i32_to_v4i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i32>, <4 x i32>* %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %2 = sext <4 x i32> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = sext <4 x i32> %1 to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> %2, <4 x i64>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand All @@ -118,17 +112,11 @@ define void @sext_v4i32_to_v4i64(<4 x i32>* %a) {
}

define void @zext_v16i16_to_v16i32(<16 x i16>* %a) {
; SSE2-LABEL: 'zext_v16i16_to_v16i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a, align 32
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = zext <16 x i16> %1 to <16 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v16i16_to_v16i32'
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a, align 32
; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <16 x i16> %1 to <16 x i32>
; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
; CHECK-LABEL: 'zext_v16i16_to_v16i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = load <16 x i16>, <16 x i16>* %a, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <16 x i16> %1 to <16 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%1 = load <16 x i16>, <16 x i16>* %a
%2 = zext <16 x i16> %1 to <16 x i32>
Expand Down Expand Up @@ -156,17 +144,11 @@ define void @sext_v16i16_to_v16i32(<16 x i16>* %a) {
}

define void @zext_v8i16_to_v8i32(<8 x i16>* %a) {
; SSE2-LABEL: 'zext_v8i16_to_v8i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = zext <8 x i16> %1 to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v8i16_to_v8i32'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <8 x i16> %1 to <8 x i32>
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
; CHECK-LABEL: 'zext_v8i16_to_v8i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i16>, <8 x i16>* %a, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <8 x i16> %1 to <8 x i32>
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%1 = load <8 x i16>, <8 x i16>* %a
%2 = zext <8 x i16> %1 to <8 x i32>
Expand Down Expand Up @@ -228,7 +210,7 @@ define void @sext_v4i16_to_v4i32(<4 x i16>* %a) {
define void @zext_v16i8_to_v16i32(<16 x i8>* %a) {
; SSE2-LABEL: 'zext_v16i8_to_v16i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %2 = zext <16 x i8> %1 to <16 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %2 = zext <16 x i8> %1 to <16 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store <16 x i32> %2, <16 x i32>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand Down Expand Up @@ -266,7 +248,7 @@ define void @sext_v16i8_to_v16i32(<16 x i8>* %a) {
define void @zext_v8i8_to_v8i32(<8 x i8>* %a) {
; SSE2-LABEL: 'zext_v8i8_to_v8i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <8 x i8>, <8 x i8>* %a, align 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = zext <8 x i8> %1 to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = zext <8 x i8> %1 to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <8 x i32> %2, <8 x i32>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand Down Expand Up @@ -340,17 +322,11 @@ define void @sext_v4i8_to_v4i32(<4 x i8>* %a) {
}

define void @zext_v16i8_to_v16i16(<16 x i8>* %a) {
; SSE2-LABEL: 'zext_v16i8_to_v16i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %2 = zext <16 x i8> %1 to <16 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; SSE41-LABEL: 'zext_v16i8_to_v16i16'
; SSE41-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <16 x i8> %1 to <16 x i16>
; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
; CHECK-LABEL: 'zext_v16i8_to_v16i16'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <16 x i8>, <16 x i8>* %a, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = zext <16 x i8> %1 to <16 x i16>
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <16 x i16> %2, <16 x i16>* undef, align 4
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%1 = load <16 x i8>, <16 x i8>* %a
%2 = zext <16 x i8> %1 to <16 x i16>
Expand Down Expand Up @@ -425,7 +401,7 @@ define void @zext_v4i8_to_v4i16(<4 x i8>* %a) {
define void @sext_v4i8_to_v4i16(<4 x i8>* %a) {
; SSE2-LABEL: 'sext_v4i8_to_v4i16'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = load <4 x i8>, <4 x i8>* %a, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %2 = sext <4 x i8> %1 to <4 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %2 = sext <4 x i8> %1 to <4 x i16>
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i16> %2, <4 x i16>* undef, align 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Analysis/CostModel/X86/uitofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ define i32 @uitofp_i16_float() {
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v2i16_v2f32 = uitofp <2 x i16> undef to <2 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_v4i16_v4f32 = uitofp <4 x i16> undef to <4 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v8i16_v8f32 = uitofp <8 x i16> undef to <8 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v16i16_v16f32 = uitofp <16 x i16> undef to <16 x float>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX2-LABEL: 'uitofp_i16_float'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ for.end: ; preds = %if.end, %entry
define void @test7(i64 %start, i64* %inc_ptr) {
; CHECK-LABEL: @test7(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0:!range !.*]]
; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0:![0-9]+]]
; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0
; CHECK-NEXT: br i1 [[OK]], label [[LOOP_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: loop.preheader:
Expand Down Expand Up @@ -403,7 +403,7 @@ for.end: ; preds = %if.end, %entry
define void @test8(i64 %start, i64* %inc_ptr) {
; CHECK-LABEL: @test8(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG1:!range !.*]]
; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG1:![0-9]+]]
; CHECK-NEXT: [[OK:%.*]] = icmp sge i64 [[INC]], 0
; CHECK-NEXT: br i1 [[OK]], label [[LOOP_PREHEADER:%.*]], label [[FOR_END:%.*]]
; CHECK: loop.preheader:
Expand Down Expand Up @@ -523,7 +523,7 @@ exit:
define void @test11(i64* %inc_ptr) {
; CHECK-LABEL: @test11(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0]]
; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0]]
; CHECK-NEXT: [[NE_COND:%.*]] = icmp ne i64 [[INC]], 0
; CHECK-NEXT: br i1 [[NE_COND]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
; CHECK: loop.preheader:
Expand Down Expand Up @@ -574,7 +574,7 @@ exit:
define void @test12(i64* %inc_ptr) {
; CHECK-LABEL: @test12(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, [[RNG0]]
; CHECK-NEXT: [[INC:%.*]] = load i64, i64* [[INC_PTR:%.*]], align 8, !range [[RNG0]]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
Expand Down
54 changes: 24 additions & 30 deletions llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,22 @@
;

define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
; SSE-LABEL: @loadext_2i8_to_2i64(
; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
; SSE-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
; SSE-NEXT: ret <2 x i64> [[TMP3]]
; SSE2-LABEL: @loadext_2i8_to_2i64(
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
; SSE2-NEXT: ret <2 x i64> [[V1]]
;
; SLM-LABEL: @loadext_2i8_to_2i64(
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
; SLM-NEXT: ret <2 x i64> [[TMP3]]
;
; AVX-LABEL: @loadext_2i8_to_2i64(
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
Expand Down Expand Up @@ -407,30 +417,14 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
}

define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
; SSE2-LABEL: @loadext_4i16_to_4i64(
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
; SSE2-NEXT: [[TMP3:%.*]] = bitcast i16* [[P2]] to <2 x i16>*
; SSE2-NEXT: [[TMP4:%.*]] = load <2 x i16>, <2 x i16>* [[TMP3]], align 1
; SSE2-NEXT: [[TMP5:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
; SSE2-NEXT: [[TMP6:%.*]] = sext <2 x i16> [[TMP4]] to <2 x i64>
; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; SSE2-NEXT: [[V12:%.*]] = shufflevector <4 x i64> poison, <4 x i64> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; SSE2-NEXT: [[V31:%.*]] = shufflevector <4 x i64> [[V12]], <4 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE2-NEXT: ret <4 x i64> [[V31]]
;
; SLM-LABEL: @loadext_4i16_to_4i64(
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
; SLM-NEXT: ret <4 x i64> [[TMP3]]
; SSE-LABEL: @loadext_4i16_to_4i64(
; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
; SSE-NEXT: ret <4 x i64> [[TMP3]]
;
; AVX-LABEL: @loadext_4i16_to_4i64(
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
Expand Down
54 changes: 24 additions & 30 deletions llvm/test/Transforms/SLPVectorizer/X86/sext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,22 @@
;

define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
; SSE-LABEL: @loadext_2i8_to_2i64(
; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; SSE-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
; SSE-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
; SSE-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
; SSE-NEXT: ret <2 x i64> [[TMP3]]
; SSE2-LABEL: @loadext_2i8_to_2i64(
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
; SSE2-NEXT: ret <2 x i64> [[V1]]
;
; SLM-LABEL: @loadext_2i8_to_2i64(
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; SLM-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
; SLM-NEXT: ret <2 x i64> [[TMP3]]
;
; AVX-LABEL: @loadext_2i8_to_2i64(
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
Expand Down Expand Up @@ -407,30 +417,14 @@ define <4 x i32> @loadext_4i16_to_4i32(i16* %p0) {
}

define <4 x i64> @loadext_4i16_to_4i64(i16* %p0) {
; SSE2-LABEL: @loadext_4i16_to_4i64(
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
; SSE2-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
; SSE2-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
; SSE2-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <2 x i16>*
; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i16>, <2 x i16>* [[TMP1]], align 1
; SSE2-NEXT: [[TMP3:%.*]] = bitcast i16* [[P2]] to <2 x i16>*
; SSE2-NEXT: [[TMP4:%.*]] = load <2 x i16>, <2 x i16>* [[TMP3]], align 1
; SSE2-NEXT: [[TMP5:%.*]] = sext <2 x i16> [[TMP2]] to <2 x i64>
; SSE2-NEXT: [[TMP6:%.*]] = sext <2 x i16> [[TMP4]] to <2 x i64>
; SSE2-NEXT: [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; SSE2-NEXT: [[V12:%.*]] = shufflevector <4 x i64> undef, <4 x i64> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; SSE2-NEXT: [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP6]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
; SSE2-NEXT: [[V31:%.*]] = shufflevector <4 x i64> [[V12]], <4 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; SSE2-NEXT: ret <4 x i64> [[V31]]
;
; SLM-LABEL: @loadext_4i16_to_4i64(
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
; SLM-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
; SLM-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
; SLM-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
; SLM-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
; SLM-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
; SLM-NEXT: ret <4 x i64> [[TMP3]]
; SSE-LABEL: @loadext_4i16_to_4i64(
; SSE-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
; SSE-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 2
; SSE-NEXT: [[P3:%.*]] = getelementptr inbounds i16, i16* [[P0]], i64 3
; SSE-NEXT: [[TMP1:%.*]] = bitcast i16* [[P0]] to <4 x i16>*
; SSE-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 1
; SSE-NEXT: [[TMP3:%.*]] = sext <4 x i16> [[TMP2]] to <4 x i64>
; SSE-NEXT: ret <4 x i64> [[TMP3]]
;
; AVX-LABEL: @loadext_4i16_to_4i64(
; AVX-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[P0:%.*]], i64 1
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
; SSE2-LABEL: @loadext_2i8_to_2i64(
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
; SSE2-NEXT: ret <2 x i64> [[TMP3]]
; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64
; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64
; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
; SSE2-NEXT: ret <2 x i64> [[V1]]
;
; SLM-LABEL: @loadext_2i8_to_2i64(
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/zext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,13 @@
define <2 x i64> @loadext_2i8_to_2i64(i8* %p0) {
; SSE2-LABEL: @loadext_2i8_to_2i64(
; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
; SSE2-NEXT: [[TMP1:%.*]] = bitcast i8* [[P0]] to <2 x i8>*
; SSE2-NEXT: [[TMP2:%.*]] = load <2 x i8>, <2 x i8>* [[TMP1]], align 1
; SSE2-NEXT: [[TMP3:%.*]] = zext <2 x i8> [[TMP2]] to <2 x i64>
; SSE2-NEXT: ret <2 x i64> [[TMP3]]
; SSE2-NEXT: [[I0:%.*]] = load i8, i8* [[P0]], align 1
; SSE2-NEXT: [[I1:%.*]] = load i8, i8* [[P1]], align 1
; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64
; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64
; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
; SSE2-NEXT: ret <2 x i64> [[V1]]
;
; SLM-LABEL: @loadext_2i8_to_2i64(
; SLM-NEXT: [[P1:%.*]] = getelementptr inbounds i8, i8* [[P0:%.*]], i64 1
Expand Down