40 changes: 20 additions & 20 deletions llvm/test/Analysis/CostModel/X86/extend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,8 @@ define i32 @zext_vXi1() {
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
Expand All @@ -399,8 +399,8 @@ define i32 @zext_vXi1() {
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
Expand All @@ -414,7 +414,7 @@ define i32 @zext_vXi1() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>
Expand All @@ -424,8 +424,8 @@ define i32 @zext_vXi1() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
Expand All @@ -439,7 +439,7 @@ define i32 @zext_vXi1() {
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = zext i1 undef to i64
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = zext <2 x i1> undef to <2 x i64>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = zext <4 x i1> undef to <4 x i64>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = zext <8 x i1> undef to <8 x i64>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = zext i1 undef to i32
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = zext <2 x i1> undef to <2 x i32>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = zext <4 x i1> undef to <4 x i32>
Expand Down Expand Up @@ -474,8 +474,8 @@ define i32 @zext_vXi1() {
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = zext <2 x i1> undef to <2 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = zext <4 x i1> undef to <4 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = zext <8 x i1> undef to <8 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = zext <16 x i1> undef to <16 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = zext <32 x i1> undef to <32 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = zext i1 undef to i8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = zext <2 x i1> undef to <2 x i8>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i8 = zext <4 x i1> undef to <4 x i8>
Expand Down Expand Up @@ -877,8 +877,8 @@ define i32 @sext_vXi1() {
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
Expand All @@ -902,8 +902,8 @@ define i32 @sext_vXi1() {
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
Expand All @@ -917,7 +917,7 @@ define i32 @sext_vXi1() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>
Expand All @@ -927,8 +927,8 @@ define i32 @sext_vXi1() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
Expand All @@ -942,7 +942,7 @@ define i32 @sext_vXi1() {
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32>
; AVX512BW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32>
Expand Down Expand Up @@ -977,8 +977,8 @@ define i32 @sext_vXi1() {
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = sext <2 x i1> undef to <2 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i16 = sext <4 x i1> undef to <4 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = sext <8 x i1> undef to <8 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16i16 = sext <16 x i1> undef to <16 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %V32i16 = sext <32 x i1> undef to <32 x i16>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sext i1 undef to i8
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i8 = sext <2 x i1> undef to <2 x i8>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i8 = sext <4 x i1> undef to <4 x i8>
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/Analysis/CostModel/X86/fptosi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
define i32 @fptosi_double_i64(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i64'
Expand All @@ -28,15 +28,15 @@ define i32 @fptosi_double_i64(i32 %arg) {
; AVX-LABEL: 'fptosi_double_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptosi_double_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'fptosi_double_i64'
Expand Down Expand Up @@ -121,9 +121,9 @@ define i32 @fptosi_double_i16(i32 %arg) {
define i32 @fptosi_double_i8(i32 %arg) {
; SSE2-LABEL: 'fptosi_double_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_double_i8'
Expand Down Expand Up @@ -164,10 +164,10 @@ define i32 @fptosi_double_i8(i32 %arg) {
define i32 @fptosi_float_i64(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i64'
Expand All @@ -181,17 +181,17 @@ define i32 @fptosi_float_i64(i32 %arg) {
; AVX-LABEL: 'fptosi_float_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptosi_float_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'fptosi_float_i64'
Expand Down Expand Up @@ -265,9 +265,9 @@ define i32 @fptosi_float_i16(i32 %arg) {
define i32 @fptosi_float_i8(i32 %arg) {
; SSE2-LABEL: 'fptosi_float_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 271 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptosi_float_i8'
Expand Down
58 changes: 29 additions & 29 deletions llvm/test/Analysis/CostModel/X86/fptoui.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
define i32 @fptoui_double_i64(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i64'
Expand All @@ -28,15 +28,15 @@ define i32 @fptoui_double_i64(i32 %arg) {
; AVX-LABEL: 'fptoui_double_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptoui_double_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'fptoui_double_i64'
Expand All @@ -63,9 +63,9 @@ define i32 @fptoui_double_i64(i32 %arg) {
define i32 @fptoui_double_i32(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i32'
Expand Down Expand Up @@ -149,9 +149,9 @@ define i32 @fptoui_double_i16(i32 %arg) {
define i32 @fptoui_double_i8(i32 %arg) {
; SSE2-LABEL: 'fptoui_double_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_double_i8'
Expand Down Expand Up @@ -192,10 +192,10 @@ define i32 @fptoui_double_i8(i32 %arg) {
define i32 @fptoui_float_i64(i32 %arg) {
; SSE2-LABEL: 'fptoui_float_i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i64'
Expand All @@ -209,17 +209,17 @@ define i32 @fptoui_float_i64(i32 %arg) {
; AVX-LABEL: 'fptoui_float_i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64
; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512F-LABEL: 'fptoui_float_i64'
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'fptoui_float_i64'
Expand Down Expand Up @@ -249,9 +249,9 @@ define i32 @fptoui_float_i64(i32 %arg) {
define i32 @fptoui_float_i32(i32 %arg) {
; SSE2-LABEL: 'fptoui_float_i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i32'
Expand Down Expand Up @@ -335,9 +335,9 @@ define i32 @fptoui_float_i16(i32 %arg) {
define i32 @fptoui_float_i8(i32 %arg) {
; SSE2-LABEL: 'fptoui_float_i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8
; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 271 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE42-LABEL: 'fptoui_float_i8'
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/Analysis/CostModel/X86/fround.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ target triple = "x86_64-apple-macosx10.8.0"
define i32 @ceil(i32 %arg) {
; SSE2-LABEL: 'ceil'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
Expand Down Expand Up @@ -96,9 +96,9 @@ define i32 @ceil(i32 %arg) {
define i32 @floor(i32 %arg) {
; SSE2-LABEL: 'floor'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.floor.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.floor.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
Expand Down Expand Up @@ -176,9 +176,9 @@ define i32 @floor(i32 %arg) {
define i32 @nearbyint(i32 %arg) {
; SSE2-LABEL: 'nearbyint'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
Expand Down Expand Up @@ -256,9 +256,9 @@ define i32 @nearbyint(i32 %arg) {
define i32 @rint(i32 %arg) {
; SSE2-LABEL: 'rint'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.rint.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.rint.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
Expand Down Expand Up @@ -336,9 +336,9 @@ define i32 @rint(i32 %arg) {
define i32 @trunc(i32 %arg) {
; SSE2-LABEL: 'trunc'
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ for.end: ; preds = %vector.body
ret void

; CORE2: Printing analysis 'Cost Model Analysis' for function 'test1':
; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
; CORE2: Cost Model: Found an estimated cost of 49 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)

; COREI7: Printing analysis 'Cost Model Analysis' for function 'test1':
; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
Expand Down Expand Up @@ -50,7 +50,7 @@ for.end: ; preds = %vector.body
ret void

; CORE2: Printing analysis 'Cost Model Analysis' for function 'test2':
; CORE2: Cost Model: Found an estimated cost of 46 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
; CORE2: Cost Model: Found an estimated cost of 49 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)

; COREI7: Printing analysis 'Cost Model Analysis' for function 'test2':
; COREI7: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Analysis/CostModel/X86/load_store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ define i32 @loads(i32 %arg) {
;CHECK: cost of 3 {{.*}} load
load <3 x i64>, <3 x i64>* undef, align 4

;CHECK: cost of 10 {{.*}} load
;CHECK: cost of 12 {{.*}} load
load <5 x i32>, <5 x i32>* undef, align 4

;CHECK: cost of 10 {{.*}} load
;CHECK: cost of 14 {{.*}} load
load <5 x i64>, <5 x i64>* undef, align 4

ret i32 undef
Expand Down
1,035 changes: 657 additions & 378 deletions llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll

Large diffs are not rendered by default.

36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/X86/shuffle-extract_subvector.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand Down Expand Up @@ -109,11 +109,11 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
; AVX512-NEXT: Cost Model: Unknown cost for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Analysis/CostModel/X86/sitofp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ define i32 @sitofp_i64_double() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double
; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'sitofp_i64_double'
Expand Down Expand Up @@ -241,8 +241,8 @@ define i32 @sitofp_i64_float() {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float>
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512DQ-LABEL: 'sitofp_i64_float'
Expand Down
335 changes: 189 additions & 146 deletions llvm/test/Analysis/CostModel/X86/vector-extract.ll

Large diffs are not rendered by default.

787 changes: 681 additions & 106 deletions llvm/test/Analysis/CostModel/X86/vector-insert.ll

Large diffs are not rendered by default.

120 changes: 87 additions & 33 deletions llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -455,11 +455,17 @@ define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) {
;

define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
; SSE-LABEL: 'splatvar_shift_v2i64'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
; SSE2-LABEL: 'splatvar_shift_v2i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
;
; SSE42-LABEL: 'splatvar_shift_v2i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %splat
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
;
; AVX-LABEL: 'splatvar_shift_v2i64'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <2 x i64> undef, i64 %b, i32 0
Expand All @@ -486,11 +492,17 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, i64 %b) {
}

define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
; SSE-LABEL: 'splatvar_shift_v4i64'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
; SSE2-LABEL: 'splatvar_shift_v4i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
;
; SSE42-LABEL: 'splatvar_shift_v4i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, %splat
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
;
; AVX1-LABEL: 'splatvar_shift_v4i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0
Expand Down Expand Up @@ -535,11 +547,17 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, i64 %b) {
}

define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
; SSE-LABEL: 'splatvar_shift_v8i64'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
; SSE2-LABEL: 'splatvar_shift_v8i64'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i64'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, %splat
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i64'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0
Expand Down Expand Up @@ -584,11 +602,35 @@ define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, i64 %b) {
}

define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
; CHECK-LABEL: 'splatvar_shift_v4i32'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
; SSE2-LABEL: 'splatvar_shift_v4i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
;
; SSE42-LABEL: 'splatvar_shift_v4i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
;
; AVX-LABEL: 'splatvar_shift_v4i32'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
;
; XOP-LABEL: 'splatvar_shift_v4i32'
; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
; XOP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
; XOP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
;
; AVX512-LABEL: 'splatvar_shift_v4i32'
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i32> undef, i32 %b, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, %splat
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
;
%insert = insertelement <4 x i32> undef, i32 %b, i32 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer
Expand All @@ -597,11 +639,17 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, i32 %b) {
}

define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
; SSE-LABEL: 'splatvar_shift_v8i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
; SSE2-LABEL: 'splatvar_shift_v8i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
;
; SSE42-LABEL: 'splatvar_shift_v8i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, %splat
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
;
; AVX1-LABEL: 'splatvar_shift_v8i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0
Expand Down Expand Up @@ -646,11 +694,17 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, i32 %b) {
}

define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, i32 %b) {
; SSE-LABEL: 'splatvar_shift_v16i32'
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
; SSE2-LABEL: 'splatvar_shift_v16i32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
;
; SSE42-LABEL: 'splatvar_shift_v16i32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, %splat
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
;
; AVX1-LABEL: 'splatvar_shift_v16i32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0
Expand Down Expand Up @@ -861,7 +915,7 @@ define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, i16 %b) {

define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v16i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %shift = ashr <16 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
Expand Down Expand Up @@ -898,7 +952,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, i8 %b) {

define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v32i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %shift = ashr <32 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
Expand Down Expand Up @@ -953,7 +1007,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, i8 %b) {

define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, i8 %b) {
; SSE2-LABEL: 'splatvar_shift_v64i8'
; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer
; SSE2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %shift = ashr <64 x i8> %a, %splat
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
Expand Down
138 changes: 105 additions & 33 deletions llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll

Large diffs are not rendered by default.

138 changes: 105 additions & 33 deletions llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll

Large diffs are not rendered by default.

58 changes: 58 additions & 0 deletions llvm/test/Transforms/InstCombine/pr14365.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s

; int test0(int a) { return (a + (~(a & 0x55555555) + 1)); }
define i32 @test0(i32 %a0) {
; CHECK-LABEL: @test0(
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A0:%.*]], -1431655766
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = and i32 %a0, 1431655765
%2 = xor i32 %1, -1
%3 = add nsw i32 %2, 1
%4 = add nsw i32 %a0, %3
ret i32 %4
}

define <4 x i32> @test0_vec(<4 x i32> %a0) {
; CHECK-LABEL: @test0_vec(
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], <i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766>
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = and <4 x i32> %a0, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
%2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
%3 = add nsw <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
%4 = add nsw <4 x i32> %a0, %3
ret <4 x i32> %4
}

; int test1(int a) { return (a + (~((a >> 1) & 0x55555555) + 1)); }
define i32 @test1(i32 %a0) {
; CHECK-LABEL: @test1(
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[A0:%.*]], 1
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1431655765
; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[A0]], [[TMP2]]
; CHECK-NEXT: ret i32 [[TMP3]]
;
%1 = ashr i32 %a0, 1
%2 = and i32 %1, 1431655765
%3 = xor i32 %2, -1
%4 = add nsw i32 %3, 1
%5 = add nsw i32 %a0, %4
ret i32 %5
}

define <4 x i32> @test1_vec(<4 x i32> %a0) {
; CHECK-LABEL: @test1_vec(
; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[A0]], [[TMP2]]
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
;
%1 = ashr <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1>
%2 = and <4 x i32> %1, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
%3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
%4 = add nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
%5 = add nsw <4 x i32> %a0, %4
ret <4 x i32> %5
}
66 changes: 44 additions & 22 deletions llvm/test/Transforms/LoopVectorize/X86/interleaving.ll
Original file line number Diff line number Diff line change
@@ -1,37 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine < %s | FileCheck %s --check-prefix=NORMAL
; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=sandybridge < %s | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=haswell < %s | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=slm < %s | FileCheck %s --check-prefix=SLOW
; RUN: opt -S -mtriple=x86_64-pc_linux -loop-vectorize -instcombine -mcpu=atom < %s | FileCheck %s --check-prefix=SLOW

define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) {
; NORMAL-LABEL: @foo(
; NORMAL-NEXT: entry:
; NORMAL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; NORMAL: vector.ph:
; NORMAL-NEXT: br label [[VECTOR_BODY:%.*]]
; NORMAL: vector.body:
; NORMAL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; NORMAL-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
; NORMAL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
; NORMAL-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <8 x i32>*
; NORMAL-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
; NORMAL-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; NORMAL-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; NORMAL-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]]
; NORMAL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; NORMAL-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
; NORMAL-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP5]], align 4
; NORMAL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; NORMAL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; NORMAL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; NORMAL: middle.block:
; NORMAL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; NORMAL: scalar.ph:
; NORMAL-NEXT: br label [[FOR_BODY:%.*]]
; NORMAL: for.cond.cleanup:
; NORMAL-NEXT: ret void
; NORMAL: for.body:
; NORMAL-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !2
; NORMAL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; NORMAL-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1
; NORMAL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
; NORMAL-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; NORMAL-NEXT: [[TMP2:%.*]] = or i64 [[TMP0]], 1
; NORMAL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
; NORMAL-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX3]], align 4
; NORMAL-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], [[TMP1]]
; NORMAL-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
; NORMAL-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX6]], align 4
; NORMAL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; NORMAL-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
; NORMAL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
;
; AVX-LABEL: @foo(
; AVX-NEXT: entry:
; AVX-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; AVX: vector.ph:
; AVX-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX: vector.body:
; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1
; AVX-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
; AVX-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <8 x i32>*
; AVX-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP2]], align 4
; AVX-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; AVX-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; AVX-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[STRIDED_VEC1]], [[STRIDED_VEC]]
; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; AVX-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
; AVX-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP5]], align 4
; AVX-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; AVX-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; AVX-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; AVX: middle.block:
; AVX-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; AVX: scalar.ph:
; AVX-NEXT: br label [[FOR_BODY:%.*]]
; AVX: for.cond.cleanup:
; AVX-NEXT: ret void
; AVX: for.body:
; AVX-NEXT: br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !2
;
; SLOW-LABEL: @foo(
; SLOW-NEXT: entry:
Expand Down
94 changes: 38 additions & 56 deletions llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,77 +18,59 @@ define i32 @matrix_row_col([100 x i32]* nocapture readonly %data, i32 %i, i32 %j
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP4]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP5]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP6]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP7]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP12]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP13]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP22:%.*]] = load i32, i32* [[TMP14]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP15]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[TMP16]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP17]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP18]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <8 x i32> undef, i32 [[TMP19]], i32 0
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <8 x i32> [[TMP27]], i32 [[TMP20]], i32 1
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <8 x i32> [[TMP28]], i32 [[TMP21]], i32 2
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP29]], i32 [[TMP22]], i32 3
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <8 x i32> [[TMP30]], i32 [[TMP23]], i32 4
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <8 x i32> [[TMP31]], i32 [[TMP24]], i32 5
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP25]], i32 6
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <8 x i32> [[TMP33]], i32 [[TMP26]], i32 7
; CHECK-NEXT: [[TMP35:%.*]] = mul nsw <8 x i32> [[TMP34]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP36:%.*]] = add <8 x i32> [[VEC_PHI]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP37]] = add <8 x i32> [[TMP36]], [[TMP35]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA:%.*]], i64 [[IDXPROM]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[TMP3]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP7]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP8]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP9]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP10]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i32> undef, i32 [[TMP11]], i32 0
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i32 1
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP13]], i32 2
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP14]], i32 3
; CHECK-NEXT: [[TMP19:%.*]] = mul nsw <4 x i32> [[TMP18]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[VEC_PHI]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP21]] = add <4 x i32> [[TMP20]], [[TMP19]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
; CHECK: middle.block:
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i32> [[TMP37]], <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <8 x i32> [[TMP37]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <8 x i32> [[BIN_RDX]], <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <8 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]]
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 96
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP21]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF1]]
; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i32> [[BIN_RDX2]], i32 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 100, 100
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i32 [ [[ADD7:%.*]], [[FOR_BODY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[ADD7_LCSSA]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[SUM_015:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD7]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[IDXPROM]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP40:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4, !tbaa !1
; CHECK-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4, !tbaa !1
; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[DATA]], i64 [[INDVARS_IV]], i64 [[IDXPROM5]]
; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !tbaa !1
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP41]], [[TMP40]]
; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !tbaa !1
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP25]], [[TMP24]]
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUM_015]], 4
; CHECK-NEXT: [[ADD7]] = add i32 [[ADD]], [[MUL]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
Expand Down
16 changes: 9 additions & 7 deletions llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@ define void @_Z10fooConvertPDv4_xS0_S0_PKS_() {
; CHECK-LABEL: @_Z10fooConvertPDv4_xS0_S0_PKS_(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <16 x half> undef, i32 4
; CHECK-NEXT: [[CONV_I_4_I:%.*]] = fpext half [[TMP0]] to float
; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[CONV_I_4_I]] to i32
; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 4
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x half> undef, i32 5
; CHECK-NEXT: [[CONV_I_5_I:%.*]] = fpext half [[TMP2]] to float
; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[CONV_I_5_I]] to i32
; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP3]], i32 5
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <16 x half> undef, i32 5
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> undef, half [[TMP0]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[TMP1]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
; CHECK-NEXT: [[VECINS_I_4_I:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 4
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[VECINS_I_5_I:%.*]] = insertelement <8 x i32> [[VECINS_I_4_I]], i32 [[TMP7]], i32 5
; CHECK-NEXT: ret void
;
entry:
Expand Down
116 changes: 20 additions & 96 deletions llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,SSE,SLM
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -basicaa -slp-vectorizer -instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512BW

define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
; CHECK-LABEL: @add_sub_v8i32(
Expand Down Expand Up @@ -101,44 +101,11 @@ define <4 x i32> @add_mul_v4i32(<4 x i32> %a, <4 x i32> %b) {
}

define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @ashr_shl_v8i32(
; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
; SSE-NEXT: [[AB2:%.*]] = ashr i32 [[A2]], [[B2]]
; SSE-NEXT: [[AB3:%.*]] = ashr i32 [[A3]], [[B3]]
; SSE-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[R3]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; SLM-LABEL: @ashr_shl_v8i32(
; SLM-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; SLM-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; SLM-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; SLM-NEXT: ret <8 x i32> [[R7]]
;
; AVX-LABEL: @ashr_shl_v8i32(
; AVX-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX-NEXT: ret <8 x i32> [[R7]]
;
; AVX512-LABEL: @ashr_shl_v8i32(
; AVX512-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; AVX512-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; AVX512-NEXT: ret <8 x i32> [[R7]]
; CHECK-LABEL: @ashr_shl_v8i32(
; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i32> [[A]], [[B]]
; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
; CHECK-NEXT: ret <8 x i32> [[R7]]
;
%a0 = extractelement <8 x i32> %a, i32 0
%a1 = extractelement <8 x i32> %a, i32 1
Expand Down Expand Up @@ -184,14 +151,6 @@ define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
; SSE-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; SLM-LABEL: @ashr_shl_v8i32_const(
; SLM-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; SLM-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
; SLM-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
; SLM-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; SLM-NEXT: ret <8 x i32> [[R7]]
;
; AVX1-LABEL: @ashr_shl_v8i32_const(
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; AVX1-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
Expand Down Expand Up @@ -243,66 +202,31 @@ define <8 x i32> @ashr_lshr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: @ashr_lshr_shl_v8i32(
; SSE-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
; SSE-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; SSE-NEXT: [[A2:%.*]] = extractelement <8 x i32> [[A]], i32 2
; SSE-NEXT: [[A3:%.*]] = extractelement <8 x i32> [[A]], i32 3
; SSE-NEXT: [[A4:%.*]] = extractelement <8 x i32> [[A]], i32 4
; SSE-NEXT: [[A5:%.*]] = extractelement <8 x i32> [[A]], i32 5
; SSE-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
; SSE-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; SSE-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
; SSE-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
; SSE-NEXT: [[B2:%.*]] = extractelement <8 x i32> [[B]], i32 2
; SSE-NEXT: [[B3:%.*]] = extractelement <8 x i32> [[B]], i32 3
; SSE-NEXT: [[B4:%.*]] = extractelement <8 x i32> [[B]], i32 4
; SSE-NEXT: [[B5:%.*]] = extractelement <8 x i32> [[B]], i32 5
; SSE-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
; SSE-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
; SSE-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
; SSE-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
; SSE-NEXT: [[AB2:%.*]] = lshr i32 [[A2]], [[B2]]
; SSE-NEXT: [[AB3:%.*]] = lshr i32 [[A3]], [[B3]]
; SSE-NEXT: [[AB4:%.*]] = lshr i32 [[A4]], [[B4]]
; SSE-NEXT: [[AB5:%.*]] = lshr i32 [[A5]], [[B5]]
; SSE-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
; SSE-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
; SSE-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
; SSE-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
; SSE-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[AB2]], i32 2
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[AB3]], i32 3
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[AB4]], i32 4
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[AB5]], i32 5
; SSE-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
; SSE-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
; SSE-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
; SSE-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
; SSE-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
; SSE-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
; SSE-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
; SSE-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
; SSE-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; SSE-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; SSE-NEXT: ret <8 x i32> [[R7]]
;
; SLM-LABEL: @ashr_lshr_shl_v8i32(
; SLM-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
; SLM-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
; SLM-NEXT: [[A6:%.*]] = extractelement <8 x i32> [[A]], i32 6
; SLM-NEXT: [[A7:%.*]] = extractelement <8 x i32> [[A]], i32 7
; SLM-NEXT: [[B0:%.*]] = extractelement <8 x i32> [[B:%.*]], i32 0
; SLM-NEXT: [[B1:%.*]] = extractelement <8 x i32> [[B]], i32 1
; SLM-NEXT: [[B6:%.*]] = extractelement <8 x i32> [[B]], i32 6
; SLM-NEXT: [[B7:%.*]] = extractelement <8 x i32> [[B]], i32 7
; SLM-NEXT: [[AB0:%.*]] = ashr i32 [[A0]], [[B0]]
; SLM-NEXT: [[AB1:%.*]] = ashr i32 [[A1]], [[B1]]
; SLM-NEXT: [[TMP1:%.*]] = lshr <8 x i32> [[A]], [[B]]
; SLM-NEXT: [[AB6:%.*]] = shl i32 [[A6]], [[B6]]
; SLM-NEXT: [[AB7:%.*]] = shl i32 [[A7]], [[B7]]
; SLM-NEXT: [[R0:%.*]] = insertelement <8 x i32> undef, i32 [[AB0]], i32 0
; SLM-NEXT: [[R1:%.*]] = insertelement <8 x i32> [[R0]], i32 [[AB1]], i32 1
; SLM-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2
; SLM-NEXT: [[R2:%.*]] = insertelement <8 x i32> [[R1]], i32 [[TMP2]], i32 2
; SLM-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3
; SLM-NEXT: [[R3:%.*]] = insertelement <8 x i32> [[R2]], i32 [[TMP3]], i32 3
; SLM-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4
; SLM-NEXT: [[R4:%.*]] = insertelement <8 x i32> [[R3]], i32 [[TMP4]], i32 4
; SLM-NEXT: [[TMP5:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5
; SLM-NEXT: [[R5:%.*]] = insertelement <8 x i32> [[R4]], i32 [[TMP5]], i32 5
; SLM-NEXT: [[R6:%.*]] = insertelement <8 x i32> [[R5]], i32 [[AB6]], i32 6
; SLM-NEXT: [[R7:%.*]] = insertelement <8 x i32> [[R6]], i32 [[AB7]], i32 7
; SLM-NEXT: ret <8 x i32> [[R7]]
;
; AVX1-LABEL: @ashr_lshr_shl_v8i32(
; AVX1-NEXT: [[A0:%.*]] = extractelement <8 x i32> [[A:%.*]], i32 0
; AVX1-NEXT: [[A1:%.*]] = extractelement <8 x i32> [[A]], i32 1
Expand Down
Loading