122 changes: 55 additions & 67 deletions llvm/test/Transforms/InstCombine/sub-minmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ define i32 @max_na_b_minux_na(i32 %A, i32 %B) {
; CHECK-LABEL: @max_na_b_minux_na(
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[NOT]], i32 [[B:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[X:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[X]]
;
%not = xor i32 %A, -1
%l0 = icmp ult i32 %not, %B
Expand Down Expand Up @@ -85,8 +85,8 @@ define i32 @max_b_na_minus_na(i32 %A, i32 %B) {
; CHECK-LABEL: @max_b_na_minus_na(
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[NOT]], i32 [[B:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[X:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[X]]
;
%not = xor i32 %A, -1
%l0 = icmp ugt i32 %not, %B
Expand All @@ -112,8 +112,8 @@ define i32 @na_minus_max_b_na(i32 %A, i32 %B) {
define i32 @max_na_bi_minux_na(i32 %A, i32 %Bi) {
; CHECK-LABEL: @max_na_bi_minux_na(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[BI:%.*]], i32 [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[X:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[X]]
;
%B = xor i32 %Bi, -1
%not = xor i32 %A, -1
Expand All @@ -139,8 +139,8 @@ define i32 @na_minus_max_na_bi(i32 %A, i32 %Bi) {
define i32 @max_bi_na_minus_na(i32 %A, i32 %Bi) {
; CHECK-LABEL: @max_bi_na_minus_na(
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[BI:%.*]], i32 [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[X:%.*]] = sub i32 0, [[TMP1]]
; CHECK-NEXT: ret i32 [[X]]
;
%B = xor i32 %Bi, -1
%not = xor i32 %A, -1
Expand All @@ -166,11 +166,10 @@ define i32 @na_minus_max_bi_na(i32 %A, i32 %Bi) {

define i32 @max_na_bi_minux_na_use(i32 %A, i32 %Bi) {
; CHECK-LABEL: @max_na_bi_minux_na_use(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], -32
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32
; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP2]]
; CHECK-NEXT: call void @use32(i32 [[L1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 -32)
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP1]]
; CHECK-NEXT: call void @use32(i32 [[TMP2]])
; CHECK-NEXT: ret i32 [[X]]
;
%not = xor i32 %A, -1
Expand All @@ -183,11 +182,10 @@ define i32 @max_na_bi_minux_na_use(i32 %A, i32 %Bi) {

define i32 @na_minus_max_na_bi_use(i32 %A, i32 %Bi) {
; CHECK-LABEL: @na_minus_max_na_bi_use(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A:%.*]], -32
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[A]], i32 -32
; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP2]], [[A]]
; CHECK-NEXT: call void @use32(i32 [[L1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[A:%.*]], i32 -32)
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP1]], [[A]]
; CHECK-NEXT: call void @use32(i32 [[TMP2]])
; CHECK-NEXT: ret i32 [[X]]
;
%not = xor i32 %A, -1
Expand All @@ -200,11 +198,10 @@ define i32 @na_minus_max_na_bi_use(i32 %A, i32 %Bi) {

define i32 @max_bi_na_minus_na_use(i32 %A, i32 %Bi) {
; CHECK-LABEL: @max_bi_na_minus_na_use(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[BI:%.*]], [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP2]]
; CHECK-NEXT: call void @use32(i32 [[L1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[BI:%.*]], i32 [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP1]]
; CHECK-NEXT: call void @use32(i32 [[TMP2]])
; CHECK-NEXT: ret i32 [[X]]
;
%not = xor i32 %A, -1
Expand All @@ -218,11 +215,10 @@ define i32 @max_bi_na_minus_na_use(i32 %A, i32 %Bi) {

define i32 @na_minus_max_bi_na_use(i32 %A, i32 %Bi) {
; CHECK-LABEL: @na_minus_max_bi_na_use(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[BI:%.*]], [[A:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP2]], [[A]]
; CHECK-NEXT: call void @use32(i32 [[L1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[BI:%.*]], i32 [[A:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP1]], [[A]]
; CHECK-NEXT: call void @use32(i32 [[TMP2]])
; CHECK-NEXT: ret i32 [[X]]
;
%not = xor i32 %A, -1
Expand All @@ -238,10 +234,9 @@ define i32 @na_minus_max_bi_na_use(i32 %A, i32 %Bi) {
define i32 @max_na_bi_minux_na_use2(i32 %A, i32 %Bi) {
; CHECK-LABEL: @max_na_bi_minux_na_use2(
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1
; CHECK-NEXT: [[L0:%.*]] = icmp ult i32 [[NOT]], 31
; CHECK-NEXT: [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 31
; CHECK-NEXT: [[X:%.*]] = sub i32 [[L1]], [[NOT]]
; CHECK-NEXT: call void @use32(i32 [[L1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[NOT]], i32 31)
; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP1]], [[NOT]]
; CHECK-NEXT: call void @use32(i32 [[TMP1]])
; CHECK-NEXT: call void @use32(i32 [[NOT]])
; CHECK-NEXT: ret i32 [[X]]
;
Expand All @@ -257,10 +252,9 @@ define i32 @max_na_bi_minux_na_use2(i32 %A, i32 %Bi) {
define i32 @na_minus_max_na_bi_use2(i32 %A, i32 %Bi) {
; CHECK-LABEL: @na_minus_max_na_bi_use2(
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1
; CHECK-NEXT: [[L0:%.*]] = icmp ult i32 [[NOT]], 31
; CHECK-NEXT: [[L1:%.*]] = select i1 [[L0]], i32 [[NOT]], i32 31
; CHECK-NEXT: [[X:%.*]] = sub i32 [[NOT]], [[L1]]
; CHECK-NEXT: call void @use32(i32 [[L1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[NOT]], i32 31)
; CHECK-NEXT: [[X:%.*]] = sub i32 [[NOT]], [[TMP1]]
; CHECK-NEXT: call void @use32(i32 [[TMP1]])
; CHECK-NEXT: call void @use32(i32 [[NOT]])
; CHECK-NEXT: ret i32 [[X]]
;
Expand All @@ -276,11 +270,10 @@ define i32 @na_minus_max_na_bi_use2(i32 %A, i32 %Bi) {
define i32 @max_bi_na_minus_na_use2(i32 %A, i32 %Bi) {
; CHECK-LABEL: @max_bi_na_minus_na_use2(
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[BI:%.*]], [[A]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP2]]
; CHECK-NEXT: call void @use32(i32 [[L1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[BI:%.*]], i32 [[A]])
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[A]], [[TMP1]]
; CHECK-NEXT: call void @use32(i32 [[TMP2]])
; CHECK-NEXT: call void @use32(i32 [[NOT]])
; CHECK-NEXT: ret i32 [[X]]
;
Expand All @@ -297,11 +290,10 @@ define i32 @max_bi_na_minus_na_use2(i32 %A, i32 %Bi) {
define i32 @na_minus_max_bi_na_use2(i32 %A, i32 %Bi) {
; CHECK-LABEL: @na_minus_max_bi_na_use2(
; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[A:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[BI:%.*]], [[A]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[BI]], i32 [[A]]
; CHECK-NEXT: [[L1:%.*]] = xor i32 [[TMP2]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP2]], [[A]]
; CHECK-NEXT: call void @use32(i32 [[L1]])
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[BI:%.*]], i32 [[A]])
; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], -1
; CHECK-NEXT: [[X:%.*]] = sub i32 [[TMP1]], [[A]]
; CHECK-NEXT: call void @use32(i32 [[TMP2]])
; CHECK-NEXT: call void @use32(i32 [[NOT]])
; CHECK-NEXT: ret i32 [[X]]
;
Expand All @@ -317,14 +309,13 @@ define i32 @na_minus_max_bi_na_use2(i32 %A, i32 %Bi) {

define i8 @umin_not_sub(i8 %x, i8 %y) {
; CHECK-LABEL: @umin_not_sub(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Y]]
; CHECK-NEXT: [[MINXY:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[TMP2]], [[X]]
; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[TMP2]], [[Y]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], -1
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[TMP1]], [[X]]
; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[TMP1]], [[Y]]
; CHECK-NEXT: call void @use8(i8 [[SUBX]])
; CHECK-NEXT: call void @use8(i8 [[SUBY]])
; CHECK-NEXT: ret i8 [[MINXY]]
; CHECK-NEXT: ret i8 [[TMP2]]
;
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
Expand All @@ -339,14 +330,13 @@ define i8 @umin_not_sub(i8 %x, i8 %y) {

define i8 @umin_not_sub_rev(i8 %x, i8 %y) {
; CHECK-LABEL: @umin_not_sub_rev(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Y]]
; CHECK-NEXT: [[MINXY:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[X]], [[TMP2]]
; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[Y]], [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = xor i8 [[TMP1]], -1
; CHECK-NEXT: [[SUBX:%.*]] = sub i8 [[X]], [[TMP1]]
; CHECK-NEXT: [[SUBY:%.*]] = sub i8 [[Y]], [[TMP1]]
; CHECK-NEXT: call void @use8(i8 [[SUBX]])
; CHECK-NEXT: call void @use8(i8 [[SUBY]])
; CHECK-NEXT: ret i8 [[MINXY]]
; CHECK-NEXT: ret i8 [[TMP2]]
;
%nx = xor i8 %x, -1
%ny = xor i8 %y, -1
Expand All @@ -361,15 +351,13 @@ define i8 @umin_not_sub_rev(i8 %x, i8 %y) {

define void @umin3_not_all_ops_extra_uses_invert_subs(i8 %x, i8 %y, i8 %z) {
; CHECK-LABEL: @umin3_not_all_ops_extra_uses_invert_subs(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 [[Z]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], [[Y:%.*]]
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[Y]]
; CHECK-NEXT: [[TMP5:%.*]] = xor i8 [[TMP4]], -1
; CHECK-NEXT: [[XMIN:%.*]] = sub i8 [[TMP4]], [[X]]
; CHECK-NEXT: [[YMIN:%.*]] = sub i8 [[TMP4]], [[Y]]
; CHECK-NEXT: [[ZMIN:%.*]] = sub i8 [[TMP4]], [[Z]]
; CHECK-NEXT: call void @use8(i8 [[TMP5]])
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Z:%.*]])
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.umax.i8(i8 [[Y:%.*]], i8 [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[TMP2]], -1
; CHECK-NEXT: [[XMIN:%.*]] = sub i8 [[TMP2]], [[X]]
; CHECK-NEXT: [[YMIN:%.*]] = sub i8 [[TMP2]], [[Y]]
; CHECK-NEXT: [[ZMIN:%.*]] = sub i8 [[TMP2]], [[Z]]
; CHECK-NEXT: call void @use8(i8 [[TMP3]])
; CHECK-NEXT: call void @use8(i8 [[XMIN]])
; CHECK-NEXT: call void @use8(i8 [[YMIN]])
; CHECK-NEXT: call void @use8(i8 [[ZMIN]])
Expand Down
30 changes: 12 additions & 18 deletions llvm/test/Transforms/InstCombine/sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1226,9 +1226,8 @@ define <2 x i32> @test63vec(<2 x i32> %A) {

define i32 @test64(i32 %x) {
; CHECK-LABEL: @test64(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i32 [[TMP2]], 1
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255)
; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw i32 [[TMP1]], 1
; CHECK-NEXT: ret i32 [[DOTNEG]]
;
%1 = xor i32 %x, -1
Expand All @@ -1240,9 +1239,8 @@ define i32 @test64(i32 %x) {

define i32 @test65(i32 %x) {
; CHECK-LABEL: @test65(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], -256
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -256
; CHECK-NEXT: [[DOTNEG:%.*]] = add i32 [[TMP2]], 1
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 -256)
; CHECK-NEXT: [[DOTNEG:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: ret i32 [[DOTNEG]]
;
%1 = xor i32 %x, -1
Expand All @@ -1254,9 +1252,8 @@ define i32 @test65(i32 %x) {

define i32 @test66(i32 %x) {
; CHECK-LABEL: @test66(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], -101
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 -101
; CHECK-NEXT: [[DOTNEG:%.*]] = add nuw i32 [[TMP2]], 1
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 -101)
; CHECK-NEXT: [[DOTNEG:%.*]] = add nuw i32 [[TMP1]], 1
; CHECK-NEXT: ret i32 [[DOTNEG]]
;
%1 = xor i32 %x, -1
Expand All @@ -1268,9 +1265,8 @@ define i32 @test66(i32 %x) {

define i32 @test67(i32 %x) {
; CHECK-LABEL: @test67(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[X:%.*]], 100
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 100
; CHECK-NEXT: [[DOTNEG:%.*]] = add i32 [[TMP2]], 1
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 100)
; CHECK-NEXT: [[DOTNEG:%.*]] = add i32 [[TMP1]], 1
; CHECK-NEXT: ret i32 [[DOTNEG]]
;
%1 = xor i32 %x, -1
Expand All @@ -1283,9 +1279,8 @@ define i32 @test67(i32 %x) {
; Check splat vectors too
define <2 x i32> @test68(<2 x i32> %x) {
; CHECK-LABEL: @test68(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 255, i32 255>
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> <i32 255, i32 255>
; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 255, i32 255>)
; CHECK-NEXT: [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[DOTNEG]]
;
%1 = xor <2 x i32> %x, <i32 -1, i32 -1>
Expand All @@ -1298,9 +1293,8 @@ define <2 x i32> @test68(<2 x i32> %x) {
; And non-splat constant vectors.
define <2 x i32> @test69(<2 x i32> %x) {
; CHECK-LABEL: @test69(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 255, i32 127>
; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[X]], <2 x i32> <i32 255, i32 127>
; CHECK-NEXT: [[DOTNEG:%.*]] = add <2 x i32> [[TMP2]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 255, i32 127>)
; CHECK-NEXT: [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 1>
; CHECK-NEXT: ret <2 x i32> [[DOTNEG]]
;
%1 = xor <2 x i32> %x, <i32 -1, i32 -1>
Expand Down
86 changes: 34 additions & 52 deletions llvm/test/Transforms/InstCombine/truncating-saturate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@ declare void @use1(i1)

define i8 @testi16i8(i16 %add) {
; CHECK-LABEL: @testi16i8(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127
; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8
; CHECK-NEXT: ret i8 [[TMP5]]
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128)
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127)
; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: ret i8 [[TMP3]]
;
%sh = lshr i16 %add, 8
%conv.i = trunc i16 %sh to i8
Expand All @@ -29,12 +27,10 @@ define i8 @testi16i8(i16 %add) {

define i32 @testi64i32(i64 %add) {
; CHECK-LABEL: @testi64i32(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[ADD:%.*]], -2147483648
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[ADD]], i64 -2147483648
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 2147483647
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[TMP2]], i64 2147483647
; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
; CHECK-NEXT: ret i32 [[TMP5]]
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.smax.i64(i64 [[ADD:%.*]], i64 -2147483648)
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.smin.i64(i64 [[TMP1]], i64 2147483647)
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: ret i32 [[TMP3]]
;
%sh = lshr i64 %add, 32
%conv.i = trunc i64 %sh to i32
Expand All @@ -50,12 +46,10 @@ define i32 @testi64i32(i64 %add) {

define i16 @testi32i16i8(i32 %add) {
; CHECK-LABEL: @testi32i16i8(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127
; CHECK-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16
; CHECK-NEXT: ret i16 [[TMP5]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[ADD:%.*]], i32 -128)
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127)
; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
; CHECK-NEXT: ret i16 [[TMP3]]
;
%a = add i32 %add, 128
%cmp = icmp ult i32 %a, 256
Expand All @@ -68,12 +62,10 @@ define i16 @testi32i16i8(i32 %add) {

define <4 x i16> @testv4i32i16i8(<4 x i32> %add) {
; CHECK-LABEL: @testv4i32i16i8(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[ADD:%.*]], <i32 -128, i32 -128, i32 -128, i32 -128>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[ADD]], <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP2]], <i32 127, i32 127, i32 127, i32 127>
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP2]], <4 x i32> <i32 127, i32 127, i32 127, i32 127>
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i16>
; CHECK-NEXT: ret <4 x i16> [[TMP5]]
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
;
%a = add <4 x i32> %add, <i32 128, i32 128, i32 128, i32 128>
%cmp = icmp ult <4 x i32> %a, <i32 256, i32 256, i32 256, i32 256>
Expand All @@ -86,11 +78,9 @@ define <4 x i16> @testv4i32i16i8(<4 x i32> %add) {

define i32 @testi32i32i8(i32 %add) {
; CHECK-LABEL: @testi32i32i8(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[ADD:%.*]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[ADD]], i32 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 127
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 127
; CHECK-NEXT: ret i32 [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[ADD:%.*]], i32 -128)
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 127)
; CHECK-NEXT: ret i32 [[TMP2]]
;
%a = add i32 %add, 128
%cmp = icmp ult i32 %a, 256
Expand All @@ -103,11 +93,9 @@ define i32 @testi32i32i8(i32 %add) {
define i16 @test_truncfirst(i32 %add) {
; CHECK-LABEL: @test_truncfirst(
; CHECK-NEXT: [[T:%.*]] = trunc i32 [[ADD:%.*]] to i16
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[T]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[T]], i16 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127
; CHECK-NEXT: ret i16 [[TMP4]]
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[T]], i16 -128)
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127)
; CHECK-NEXT: ret i16 [[TMP2]]
;
%t = trunc i32 %add to i16
%a = add i16 %t, 128
Expand Down Expand Up @@ -159,12 +147,10 @@ define i32 @testi64i32addsat(i32 %a, i32 %b) {

define <4 x i8> @testv4i16i8(<4 x i16> %add) {
; CHECK-LABEL: @testv4i16i8(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], <i16 -128, i16 -128, i16 -128, i16 -128>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[ADD]], <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i16> [[TMP2]], <i16 127, i16 127, i16 127, i16 127>
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP2]], <4 x i16> <i16 127, i16 127, i16 127, i16 127>
; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i16> [[TMP4]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[TMP5]]
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i16> [[TMP2]] to <4 x i8>
; CHECK-NEXT: ret <4 x i8> [[TMP3]]
;
%sh = lshr <4 x i16> %add, <i16 8, i16 8, i16 8, i16 8>
%conv.i = trunc <4 x i16> %sh to <4 x i8>
Expand Down Expand Up @@ -200,12 +186,10 @@ define <4 x i8> @testv4i16i8add(<4 x i8> %a, <4 x i8> %b) {

define i8 @testi16i8_revcmp(i16 %add) {
; CHECK-LABEL: @testi16i8_revcmp(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127
; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8
; CHECK-NEXT: ret i8 [[TMP5]]
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128)
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127)
; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: ret i8 [[TMP3]]
;
%sh = lshr i16 %add, 8
%conv.i = trunc i16 %sh to i8
Expand All @@ -221,12 +205,10 @@ define i8 @testi16i8_revcmp(i16 %add) {

define i8 @testi16i8_revselect(i16 %add) {
; CHECK-LABEL: @testi16i8_revselect(
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i16 [[ADD:%.*]], -128
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[ADD]], i16 -128
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 127
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i16 [[TMP2]], i16 127
; CHECK-NEXT: [[TMP5:%.*]] = trunc i16 [[TMP4]] to i8
; CHECK-NEXT: ret i8 [[TMP5]]
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.smax.i16(i16 [[ADD:%.*]], i16 -128)
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.smin.i16(i16 [[TMP1]], i16 127)
; CHECK-NEXT: [[TMP3:%.*]] = trunc i16 [[TMP2]] to i8
; CHECK-NEXT: ret i8 [[TMP3]]
;
%sh = lshr i16 %add, 8
%conv.i = trunc i16 %sh to i8
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/InstCombine/umax-icmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ define i1 @ne_umax1(i32 %x, i32 %y) {

define i1 @ne_umax2(i32 %x, i32 %y) {
; CHECK-LABEL: @ne_umax2(
; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[CMP1]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%cmp1 = icmp ugt i32 %y, %x
%sel = select i1 %cmp1, i32 %y, i32 %x
Expand Down Expand Up @@ -166,8 +166,8 @@ define i1 @ne_umax3(i32 %a, i32 %y) {
define i1 @ne_umax4(i32 %a, i32 %y) {
; CHECK-LABEL: @ne_umax4(
; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP1]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%x = add i32 %a, 3 ; thwart complexity-based canonicalization
%cmp1 = icmp ugt i32 %y, %x
Expand All @@ -193,8 +193,8 @@ define i1 @ugt_umax1(i32 %x, i32 %y) {

define i1 @ugt_umax2(i32 %x, i32 %y) {
; CHECK-LABEL: @ugt_umax2(
; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[CMP1]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%cmp1 = icmp ugt i32 %y, %x
%sel = select i1 %cmp1, i32 %y, i32 %x
Expand Down Expand Up @@ -222,8 +222,8 @@ define i1 @ugt_umax3(i32 %a, i32 %y) {
define i1 @ugt_umax4(i32 %a, i32 %y) {
; CHECK-LABEL: @ugt_umax4(
; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP1]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%x = add i32 %a, 3 ; thwart complexity-based canonicalization
%cmp1 = icmp ugt i32 %y, %x
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Transforms/InstCombine/umin-icmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,8 @@ define i1 @ne_umin1(i32 %x, i32 %y) {

define i1 @ne_umin2(i32 %x, i32 %y) {
; CHECK-LABEL: @ne_umin2(
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[CMP1]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%cmp1 = icmp ult i32 %y, %x
%sel = select i1 %cmp1, i32 %y, i32 %x
Expand Down Expand Up @@ -166,8 +166,8 @@ define i1 @ne_umin3(i32 %a, i32 %y) {
define i1 @ne_umin4(i32 %a, i32 %y) {
; CHECK-LABEL: @ne_umin4(
; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3
; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP1]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%x = add i32 %a, 3 ; thwart complexity-based canonicalization
%cmp1 = icmp ult i32 %y, %x
Expand All @@ -193,8 +193,8 @@ define i1 @ult_umin1(i32 %x, i32 %y) {

define i1 @ult_umin2(i32 %x, i32 %y) {
; CHECK-LABEL: @ult_umin2(
; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: ret i1 [[CMP1]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%cmp1 = icmp ult i32 %y, %x
%sel = select i1 %cmp1, i32 %y, i32 %x
Expand Down Expand Up @@ -222,8 +222,8 @@ define i1 @ult_umin3(i32 %a, i32 %y) {
define i1 @ult_umin4(i32 %a, i32 %y) {
; CHECK-LABEL: @ult_umin4(
; CHECK-NEXT: [[X:%.*]] = add i32 [[A:%.*]], 3
; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP1]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]]
; CHECK-NEXT: ret i1 [[CMP2]]
;
%x = add i32 %a, 3 ; thwart complexity-based canonicalization
%cmp1 = icmp ult i32 %y, %x
Expand Down
23 changes: 10 additions & 13 deletions llvm/test/Transforms/InstCombine/with_overflow.ll
Original file line number Diff line number Diff line change
Expand Up @@ -570,11 +570,10 @@ define { i8, i1 } @umul_always_overflow(i8 %x) nounwind {

define { i8, i1 } @sadd_always_overflow(i8 %x) nounwind {
; CHECK-LABEL: @sadd_always_overflow(
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], 100
; CHECK-NEXT: [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 100
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[Y]], 28
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
; CHECK-NEXT: ret { i8, i1 } [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 100)
; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[TMP1]], 28
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
; CHECK-NEXT: ret { i8, i1 } [[TMP2]]
;
%c = icmp sgt i8 %x, 100
%y = select i1 %c, i8 %x, i8 100
Expand All @@ -584,11 +583,10 @@ define { i8, i1 } @sadd_always_overflow(i8 %x) nounwind {

define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind {
; CHECK-LABEL: @ssub_always_overflow(
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], 29
; CHECK-NEXT: [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 29
; CHECK-NEXT: [[A:%.*]] = sub nuw i8 -100, [[Y]]
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
; CHECK-NEXT: ret { i8, i1 } [[TMP1]]
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 29)
; CHECK-NEXT: [[A:%.*]] = sub nuw i8 -100, [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { i8, i1 } { i8 undef, i1 true }, i8 [[A]], 0
; CHECK-NEXT: ret { i8, i1 } [[TMP2]]
;
%c = icmp sgt i8 %x, 29
%y = select i1 %c, i8 %x, i8 29
Expand All @@ -598,9 +596,8 @@ define { i8, i1 } @ssub_always_overflow(i8 %x) nounwind {

define { i8, i1 } @smul_always_overflow(i8 %x) nounwind {
; CHECK-LABEL: @smul_always_overflow(
; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 [[X:%.*]], 100
; CHECK-NEXT: [[Y:%.*]] = select i1 [[C]], i8 [[X]], i8 100
; CHECK-NEXT: [[A:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[Y]], i8 2)
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 100)
; CHECK-NEXT: [[A:%.*]] = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[TMP1]], i8 2)
; CHECK-NEXT: ret { i8, i1 } [[A]]
;
%c = icmp sgt i8 %x, 100
Expand Down
87 changes: 35 additions & 52 deletions llvm/test/Transforms/InstCombine/xor.ll
Original file line number Diff line number Diff line change
Expand Up @@ -665,9 +665,8 @@ define i8 @xor_and_not_uses(i8 %x, i8* %p) {

define i32 @test39(i32 %x) {
; CHECK-LABEL: @test39(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[X:%.*]], 255
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[X]], i32 255
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255)
; CHECK-NEXT: ret i32 [[TMP1]]
;
%1 = xor i32 %x, -1
%2 = icmp sgt i32 %1, -256
Expand All @@ -679,9 +678,8 @@ define i32 @test39(i32 %x) {
define i32 @test40(i32 %x, i32 %y) {
; CHECK-LABEL: @test40(
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], [[X:%.*]]
; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[RES]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[TMP1]])
; CHECK-NEXT: ret i32 [[TMP2]]
;
%notx = xor i32 %x, -1
%cmp1 = icmp sgt i32 %notx, %y
Expand All @@ -693,9 +691,8 @@ define i32 @test40(i32 %x, i32 %y) {
define i32 @test41(i32 %x, i32 %y) {
; CHECK-LABEL: @test41(
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], [[X:%.*]]
; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[RES]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[TMP1]])
; CHECK-NEXT: ret i32 [[TMP2]]
;
%notx = xor i32 %x, -1
%cmp1 = icmp slt i32 %notx, %y
Expand All @@ -707,9 +704,8 @@ define i32 @test41(i32 %x, i32 %y) {
define i32 @test42(i32 %x, i32 %y) {
; CHECK-LABEL: @test42(
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], [[X:%.*]]
; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[RES]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[TMP1]])
; CHECK-NEXT: ret i32 [[TMP2]]
;
%notx = xor i32 %x, -1
%cmp1 = icmp ugt i32 %notx, %y
Expand All @@ -721,9 +717,8 @@ define i32 @test42(i32 %x, i32 %y) {
define i32 @test43(i32 %x, i32 %y) {
; CHECK-LABEL: @test43(
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], -1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], [[X:%.*]]
; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[X]], i32 [[TMP1]]
; CHECK-NEXT: ret i32 [[RES]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[TMP1]])
; CHECK-NEXT: ret i32 [[TMP2]]
;
%notx = xor i32 %x, -1
%cmp1 = icmp ult i32 %notx, %y
Expand All @@ -735,9 +730,8 @@ define i32 @test43(i32 %x, i32 %y) {
define i32 @test44(i32 %x, i32 %y) {
; CHECK-LABEL: @test44(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 -4, [[Y:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], [[X:%.*]]
; CHECK-NEXT: [[RES:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 [[X]]
; CHECK-NEXT: ret i32 [[RES]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[TMP1]])
; CHECK-NEXT: ret i32 [[TMP2]]
;
%z = add i32 %y, 3 ; thwart complexity-based canonicalization
%notx = xor i32 %x, -1
Expand All @@ -749,9 +743,8 @@ define i32 @test44(i32 %x, i32 %y) {

define i32 @test45(i32 %x, i32 %y) {
; CHECK-LABEL: @test45(
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 [[Y]], i32 [[X]]
; CHECK-NEXT: ret i32 [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[Y:%.*]], i32 [[X:%.*]])
; CHECK-NEXT: ret i32 [[TMP1]]
;
%z = xor i32 %y, -1
%notx = xor i32 %x, -1
Expand All @@ -764,9 +757,8 @@ define i32 @test45(i32 %x, i32 %y) {
; Check that we work with splat vectors also.
define <4 x i32> @test46(<4 x i32> %x) {
; CHECK-LABEL: @test46(
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[X]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
%2 = icmp sgt <4 x i32> %1, <i32 -256, i32 -256, i32 -256, i32 -256>
Expand All @@ -779,10 +771,9 @@ define <4 x i32> @test46(<4 x i32> %x) {
define i32 @test47(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @test47(
; CHECK-NEXT: [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[NOTX]], [[Y:%.*]]
; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[CMP1]], i32 [[NOTX]], i32 [[Y]]
; CHECK-NEXT: [[UMIN:%.*]] = xor i32 [[UMAX]], -1
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[UMAX]], [[Z:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[NOTX]], i32 [[Y:%.*]])
; CHECK-NEXT: [[UMIN:%.*]] = xor i32 [[TMP1]], -1
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[Z:%.*]]
; CHECK-NEXT: [[RES:%.*]] = mul i32 [[ADD]], [[UMIN]]
; CHECK-NEXT: ret i32 [[RES]]
;
Expand All @@ -798,9 +789,8 @@ define i32 @test47(i32 %x, i32 %y, i32 %z) {
define i32 @test48(i32 %x) {
; CHECK-LABEL: @test48(
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP1]], -1
; CHECK-NEXT: [[D:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 -1
; CHECK-NEXT: ret i32 [[D]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 -1)
; CHECK-NEXT: ret i32 [[TMP2]]
;
%a = sub i32 -2, %x
%b = icmp sgt i32 %a, 0
Expand All @@ -812,9 +802,8 @@ define i32 @test48(i32 %x) {
define <2 x i32> @test48vec(<2 x i32> %x) {
; CHECK-LABEL: @test48vec(
; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], <i32 -1, i32 -1>
; CHECK-NEXT: [[D:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>
; CHECK-NEXT: ret <2 x i32> [[D]]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
;
%a = sub <2 x i32> <i32 -2, i32 -2>, %x
%b = icmp sgt <2 x i32> %a, zeroinitializer
Expand All @@ -826,9 +815,8 @@ define <2 x i32> @test48vec(<2 x i32> %x) {
define i32 @test49(i32 %x) {
; CHECK-LABEL: @test49(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 1, [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 0
; CHECK-NEXT: [[D:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 0
; CHECK-NEXT: ret i32 [[D]]
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 0)
; CHECK-NEXT: ret i32 [[TMP2]]
;
%a = add i32 %x, -2
%b = icmp slt i32 %a, -1
Expand All @@ -840,9 +828,8 @@ define i32 @test49(i32 %x) {
define <2 x i32> @test49vec(<2 x i32> %x) {
; CHECK-LABEL: @test49vec(
; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> <i32 1, i32 1>, [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <2 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[D:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
; CHECK-NEXT: ret <2 x i32> [[D]]
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> zeroinitializer)
; CHECK-NEXT: ret <2 x i32> [[TMP2]]
;
%a = add <2 x i32> %x, <i32 -2, i32 -2>
%b = icmp slt <2 x i32> %a, <i32 -1, i32 -1>
Expand All @@ -855,9 +842,8 @@ define i32 @test50(i32 %x, i32 %y) {
; CHECK-LABEL: @test50(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 1, [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[Y:%.*]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[E]]
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%a = add i32 %x, -2
%b = sub i32 -2, %y
Expand All @@ -871,9 +857,8 @@ define <2 x i32> @test50vec(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @test50vec(
; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> <i32 1, i32 1>, [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], <i32 1, i32 1>
; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]
; CHECK-NEXT: ret <2 x i32> [[E]]
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%a = add <2 x i32> %x, <i32 -2, i32 -2>
%b = sub <2 x i32> <i32 -2, i32 -2>, %y
Expand All @@ -887,9 +872,8 @@ define i32 @test51(i32 %x, i32 %y) {
; CHECK-LABEL: @test51(
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 -3, [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[Y:%.*]], -3
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[E:%.*]] = select i1 [[TMP3]], i32 [[TMP1]], i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[E]]
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
%a = add i32 %x, 2
%b = sub i32 2, %y
Expand All @@ -903,9 +887,8 @@ define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @test51vec(
; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i32> <i32 -3, i32 -3>, [[X:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], <i32 -3, i32 -3>
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[E:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP1]], <2 x i32> [[TMP2]]
; CHECK-NEXT: ret <2 x i32> [[E]]
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
;
%a = add <2 x i32> %x, <i32 2, i32 2>
%b = sub <2 x i32> <i32 2, i32 2>, %y
Expand Down
48 changes: 22 additions & 26 deletions llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ define i32 @reduction_sum_single(i32* noalias nocapture %A) {
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD]])
; CHECK-NEXT: [[TMP3]] = add i32 [[TMP2]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
Expand Down Expand Up @@ -76,8 +76,8 @@ define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B)
; CHECK-NEXT: [[TMP10]] = add i32 [[TMP9]], [[TMP8]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
Expand Down Expand Up @@ -442,34 +442,32 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[TMP5:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[RESULT_08]], [[L0]]
; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[L0]]
; CHECK-NEXT: [[TMP5]] = call i32 @llvm.smin.i32(i32 [[RESULT_08]], i32 [[L0]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[TMP5]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
Expand Down Expand Up @@ -499,34 +497,32 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 1000, [[ENTRY]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[TMP5:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
; CHECK-NEXT: [[L0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[C0:%.*]] = icmp ugt i32 [[RESULT_08]], [[L0]]
; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[L0]]
; CHECK-NEXT: [[TMP5]] = call i32 @llvm.umax.i32(i32 [[RESULT_08]], i32 [[L0]])
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], 257
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ [[TMP5]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
Expand Down
38 changes: 17 additions & 21 deletions llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,43 +16,39 @@ define arm_aapcs_vfpcc i32 @minmaxval4(i32* nocapture readonly %x, i32* nocaptur
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[VEC_PHI1]]
; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]])
; CHECK-NEXT: [[TMP3]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP5]])
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[N]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 2147483647, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ -2147483648, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 2147483647, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[BC_MERGE_RDX2:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ -2147483648, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: [[MAX_0_LCSSA:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[COND:%.*]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[MIN_0_LCSSA:%.*]] = phi i32 [ 2147483647, [[ENTRY]] ], [ [[COND9:%.*]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[MAX_0_LCSSA:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[TMP8:%.*]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[MIN_0_LCSSA:%.*]] = phi i32 [ 2147483647, [[ENTRY]] ], [ [[TMP9:%.*]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: store i32 [[MIN_0_LCSSA]], i32* [[MINP:%.*]], align 4
; CHECK-NEXT: ret i32 [[MAX_0_LCSSA]]
; CHECK: for.body:
; CHECK-NEXT: [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[MIN_028:%.*]] = phi i32 [ [[COND9]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[MAX_027:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[MIN_028:%.*]] = phi i32 [ [[TMP9]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[MAX_027:%.*]] = phi i32 [ [[TMP8]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_029]]
; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], [[MAX_027]]
; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[TMP9]], i32 [[MAX_027]]
; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP9]], [[MIN_028]]
; CHECK-NEXT: [[COND9]] = select i1 [[CMP4]], i32 [[TMP9]], i32 [[MIN_028]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP8]] = call i32 @llvm.smax.i32(i32 [[TMP7]], i32 [[MAX_027]])
; CHECK-NEXT: [[TMP9]] = call i32 @llvm.smin.i32(i32 [[TMP7]], i32 [[MIN_028]])
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_029]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
Expand Down
1,092 changes: 541 additions & 551 deletions llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ do.end: ; preds = %cond.end
;CHECK: example1
;CHECK: load <4 x i32>
;CHECK-NEXT: shufflevector <4 x i32>
;CHECK: select <4 x i1>
;CHECK: call <4 x i32> @llvm.smax.v4i32
;CHECK: store <4 x i32>
;CHECK: ret
define void @example1(i32* nocapture %a, i32 %n, i32 %wsize) nounwind uwtable ssp {
Expand Down
109 changes: 49 additions & 60 deletions llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll

Large diffs are not rendered by default.

22 changes: 10 additions & 12 deletions llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1173,7 +1173,7 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
Expand Down Expand Up @@ -1216,20 +1216,19 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]])
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP25]], [[VEC_PHI]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP25]], i32 [[VEC_PHI]]
; CHECK-NEXT: [[TMP26]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
Expand Down Expand Up @@ -1261,7 +1260,7 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
Expand Down Expand Up @@ -1304,20 +1303,19 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]])
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt i32 [[TMP25]], [[VEC_PHI]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP25]], i32 [[VEC_PHI]]
; CHECK-NEXT: [[TMP26]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -657,24 +657,23 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[WIDE_LOAD]])
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP2]], [[VEC_PHI]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP2]], i32 [[VEC_PHI]]
; CHECK-NEXT: [[TMP3]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
Expand Down Expand Up @@ -705,24 +704,23 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[WIDE_LOAD]])
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt i32 [[TMP2]], [[VEC_PHI]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP2]], i32 [[VEC_PHI]]
; CHECK-NEXT: [[TMP3]] = call i32 @llvm.umax.i32(i32 [[TMP2]], i32 [[VEC_PHI]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
Expand Down
30 changes: 14 additions & 16 deletions llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
Expand Down Expand Up @@ -875,22 +875,21 @@ define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP24:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP23]]
; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]
; CHECK-NEXT: [[TMP26]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP26]])
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP25]])
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
Expand Down Expand Up @@ -921,7 +920,7 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
Expand Down Expand Up @@ -962,22 +961,21 @@ define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]]
; CHECK: pred.load.continue6:
; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP24:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[TMP23]]
; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]]
; CHECK-NEXT: [[TMP26]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP25]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
; CHECK-NEXT: [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP26]])
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP25]])
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]]
;
entry:
Expand Down
26 changes: 12 additions & 14 deletions llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,13 @@ define void @arm_mult_q15(i16* %pSrcA, i16* %pSrcB, i16 * noalias %pDst, i32 %bl
; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i16> [[WIDE_LOAD16]] to <8 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <8 x i32> [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[TMP5]], <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP5]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>
; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16>
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[NEXT_GEP14]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP5]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
; CHECK-NEXT: [[TMP7:%.*]] = trunc <8 x i32> [[TMP6]] to <8 x i16>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[NEXT_GEP14]] to <8 x i16>*
; CHECK-NEXT: store <8 x i16> [[TMP7]], <8 x i16>* [[TMP8]], align 2
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[BLOCKSIZE]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END]], label [[WHILE_BODY_PREHEADER17]]
Expand All @@ -59,16 +58,15 @@ define void @arm_mult_q15(i16* %pSrcA, i16* %pSrcB, i16 * noalias %pDst, i32 %bl
; CHECK-NEXT: [[PDST_ADDR_05:%.*]] = phi i16* [ [[INCDEC_PTR4:%.*]], [[WHILE_BODY]] ], [ [[PDST_ADDR_05_PH]], [[WHILE_BODY_PREHEADER17]] ]
; CHECK-NEXT: [[PSRCB_ADDR_04:%.*]] = phi i16* [ [[INCDEC_PTR1:%.*]], [[WHILE_BODY]] ], [ [[PSRCB_ADDR_04_PH]], [[WHILE_BODY_PREHEADER17]] ]
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[PSRCA_ADDR_06]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[PSRCA_ADDR_06]], align 2
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP11]] to i32
; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[PSRCA_ADDR_06]], align 2
; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32
; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i16, i16* [[PSRCB_ADDR_04]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[PSRCB_ADDR_04]], align 2
; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP12]] to i32
; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[PSRCB_ADDR_04]], align 2
; CHECK-NEXT: [[CONV2:%.*]] = sext i16 [[TMP11]] to i32
; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[MUL]], 15
; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[SHR]], 32767
; CHECK-NEXT: [[RETVAL_1_I:%.*]] = select i1 [[TMP13]], i32 [[SHR]], i32 32767
; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[RETVAL_1_I]] to i16
; CHECK-NEXT: [[TMP12:%.*]] = tail call i32 @llvm.smin.i32(i32 [[SHR]], i32 32767) #[[ATTR2:[0-9]+]]
; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[TMP12]] to i16
; CHECK-NEXT: [[INCDEC_PTR4]] = getelementptr inbounds i16, i16* [[PDST_ADDR_05]], i32 1
; CHECK-NEXT: store i16 [[CONV3]], i16* [[PDST_ADDR_05]], align 2
; CHECK-NEXT: [[DEC]] = add i32 [[BLKCNT_07]], -1
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/Transforms/PhaseOrdering/X86/vector-math.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ define <2 x i64> @max_v4i32(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: @max_v4i32(
; CHECK-NEXT: [[T0_I_I:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>
; CHECK-NEXT: [[T1_I_I:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp sgt <4 x i32> [[T0_I_I]], [[T1_I_I]]
; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[CMP_I_I]], <4 x i32> [[T0_I_I]], <4 x i32> [[T1_I_I]]
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[T0_I_I]], <4 x i32> [[T1_I_I]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
;
Expand Down
11 changes: 1 addition & 10 deletions llvm/test/Transforms/PhaseOrdering/min-max-abs-cse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,9 @@

; sub (smax a,b), (smax a,b) --> 0

; FIXME: We should canonicalize min/max to a form
; where the cmp operands match the select operands.

define i8 @smax_nsw(i8 %a, i8 %b) {
; CHECK-LABEL: @smax_nsw(
; CHECK-NEXT: [[SUB:%.*]] = sub nsw i8 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i8 [[A]], [[B]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[SUB]], 0
; CHECK-NEXT: [[M1:%.*]] = select i1 [[CMP1]], i8 0, i8 [[SUB]]
; CHECK-NEXT: [[M2:%.*]] = select i1 [[CMP2]], i8 [[SUB]], i8 0
; CHECK-NEXT: [[R:%.*]] = sub nsw i8 [[M2]], [[M1]]
; CHECK-NEXT: ret i8 [[R]]
; CHECK-NEXT: ret i8 0
;
%sub = sub nsw i8 %a, %b
%cmp1 = icmp slt i8 %a, %b
Expand Down
16 changes: 7 additions & 9 deletions llvm/test/Transforms/PhaseOrdering/minmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@ declare void @use(i8, i8, i8, i8)
define void @cmyk(i8 %r, i8 %g, i8 %b) {
; CHECK-LABEL: @cmyk(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i8 [[R:%.*]], [[B:%.*]]
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i8 [[R]], i8 [[B]]
; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i8 [[TMP1]], [[G:%.*]]
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i8 [[TMP1]], i8 [[G]]
; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], -1
; CHECK-NEXT: [[SUB31:%.*]] = sub i8 [[TMP3]], [[R]]
; CHECK-NEXT: [[SUB35:%.*]] = sub i8 [[TMP3]], [[G]]
; CHECK-NEXT: [[SUB39:%.*]] = sub i8 [[TMP3]], [[B]]
; CHECK-NEXT: call void @use(i8 [[SUB31]], i8 [[SUB35]], i8 [[SUB39]], i8 [[TMP4]])
; CHECK-NEXT: [[TMP0:%.*]] = call i8 @llvm.smax.i8(i8 [[R:%.*]], i8 [[G:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[B:%.*]], i8 [[TMP0]])
; CHECK-NEXT: [[K_0:%.*]] = xor i8 [[TMP1]], -1
; CHECK-NEXT: [[SUB31:%.*]] = sub i8 [[TMP1]], [[R]]
; CHECK-NEXT: [[SUB35:%.*]] = sub i8 [[TMP1]], [[G]]
; CHECK-NEXT: [[SUB39:%.*]] = sub i8 [[TMP1]], [[B]]
; CHECK-NEXT: call void @use(i8 [[SUB31]], i8 [[SUB35]], i8 [[SUB39]], i8 [[K_0]])
; CHECK-NEXT: ret void
;
entry:
Expand Down
12 changes: 5 additions & 7 deletions llvm/test/Transforms/PhaseOrdering/pr36760.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
define i64 @PR36760(i64 %a) {
; CHECK-LABEL: @PR36760(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i64 [[A:%.*]], 0
; CHECK-NEXT: [[DOTA:%.*]] = select i1 [[TMP0]], i64 [[A]], i64 0
; CHECK-NEXT: ret i64 [[DOTA]]
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.smax.i64(i64 [[A:%.*]], i64 0)
; CHECK-NEXT: ret i64 [[TMP0]]
;
entry:
%retval = alloca i64, align 8
Expand Down Expand Up @@ -37,10 +36,9 @@ return:
define i64 @PR36760_2(i64 %a) #0 {
; CHECK-LABEL: @PR36760_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i64 [[A:%.*]], -1
; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[A]], i64 -1
; CHECK-NEXT: [[RETVAL_0:%.*]] = xor i64 [[TMP1]], -1
; CHECK-NEXT: ret i64 [[RETVAL_0]]
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.smin.i64(i64 [[A:%.*]], i64 -1)
; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[TMP0]], -1
; CHECK-NEXT: ret i64 [[TMP1]]
;
entry:
%retval = alloca i64, align 8
Expand Down
16 changes: 7 additions & 9 deletions llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,9 @@ define void @store_i32(i32* nocapture %0, i32 %1, i32 %2) {
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = mul <4 x i32> [[TMP5]], [[SHUFFLE]]
; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP9:%.*]] = icmp ult <4 x i32> [[TMP8]], <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP11]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* [[TMP10]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: ret void
;
%4 = load i32, i32* %0, align 4, !tbaa !2
Expand Down Expand Up @@ -58,11 +57,10 @@ define void @store_i8(i8* nocapture %0, i32 %1, i32 %2) {
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[SHUFFLE]], [[TMP6]]
; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 15, i32 15, i32 15, i32 15>
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP9]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>
; CHECK-NEXT: [[TMP12:%.*]] = trunc <4 x i32> [[TMP11]] to <4 x i8>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
; CHECK-NEXT: store <4 x i8> [[TMP12]], <4 x i8>* [[TMP13]], align 1, !tbaa [[TBAA4]]
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i8>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP0]] to <4 x i8>*
; CHECK-NEXT: store <4 x i8> [[TMP11]], <4 x i8>* [[TMP12]], align 1, !tbaa [[TBAA4]]
; CHECK-NEXT: ret void
;
%4 = load i8, i8* %0, align 1, !tbaa !6
Expand Down