159 changes: 121 additions & 38 deletions llvm/test/Transforms/InstCombine/X86/x86-sse2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define double @test_sqrt_sd_0(double %a) {
; CHECK-LABEL: @test_sqrt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double %a)
; CHECK-NEXT: [[TMP1:%.*]] = call double @llvm.sqrt.f64(double [[A:%.*]])
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
Expand All @@ -27,7 +27,7 @@ define double @test_sqrt_sd_1(double %a) {

define double @test_add_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_add_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fadd double %a, %b
; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
Expand All @@ -52,9 +52,20 @@ define double @test_add_sd_1(double %a, double %b) {
ret double %6
}

define double @test_add_sd_2(double %a) {
; CHECK-LABEL: @test_add_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[A:%.*]], [[A]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}

define double @test_sub_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_sub_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fsub double %a, %b
; CHECK-NEXT: [[TMP1:%.*]] = fsub double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
Expand All @@ -79,9 +90,20 @@ define double @test_sub_sd_1(double %a, double %b) {
ret double %6
}

define double @test_sub_sd_2(double %a) {
; CHECK-LABEL: @test_sub_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = fsub double [[A:%.*]], [[A]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}

define double @test_mul_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_mul_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fmul double %a, %b
; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
Expand All @@ -106,9 +128,20 @@ define double @test_mul_sd_1(double %a, double %b) {
ret double %6
}

define double @test_mul_sd_2(double %a) {
; CHECK-LABEL: @test_mul_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A:%.*]], [[A]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}

define double @test_div_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_div_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double %a, %b
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> undef, double %a, i32 0
Expand All @@ -133,9 +166,20 @@ define double @test_div_sd_1(double %a, double %b) {
ret double %6
}

define double @test_div_sd_2(double %a) {
; CHECK-LABEL: @test_div_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = fdiv double [[A:%.*]], [[A]]
; CHECK-NEXT: ret double [[TMP1]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}

define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_min_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %b)
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
Expand All @@ -145,8 +189,8 @@ define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {

define double @test_min_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_min_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
Expand All @@ -173,9 +217,22 @@ define double @test_min_sd_1(double %a, double %b) {
ret double %6
}

define double @test_min_sd_2(double %a) {
; CHECK-LABEL: @test_min_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}

define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_max_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %b)
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
Expand All @@ -185,8 +242,8 @@ define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {

define double @test_max_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_max_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
Expand All @@ -213,9 +270,22 @@ define double @test_max_sd_1(double %a, double %b) {
ret double %6
}

define double @test_max_sd_2(double %a) {
; CHECK-LABEL: @test_max_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]])
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %1)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}

define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: @test_cmp_sd(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %b, i8 0)
; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i8 0)
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
Expand All @@ -225,8 +295,8 @@ define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {

define double @test_cmp_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_cmp_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i8 0)
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
; CHECK-NEXT: ret double [[TMP4]]
Expand All @@ -253,10 +323,23 @@ define double @test_cmp_sd_1(double %a, double %b) {
ret double %6
}

define double @test_cmp_sd_2(double %a) {
; CHECK-LABEL: @test_cmp_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP1]], i8 3)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %1, i8 3)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}

define i32 @test_comieq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comieq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -270,8 +353,8 @@ define i32 @test_comieq_sd_0(double %a, double %b) {

define i32 @test_comige_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comige_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -285,8 +368,8 @@ define i32 @test_comige_sd_0(double %a, double %b) {

define i32 @test_comigt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comigt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -300,8 +383,8 @@ define i32 @test_comigt_sd_0(double %a, double %b) {

define i32 @test_comile_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comile_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -315,8 +398,8 @@ define i32 @test_comile_sd_0(double %a, double %b) {

define i32 @test_comilt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comilt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -330,8 +413,8 @@ define i32 @test_comilt_sd_0(double %a, double %b) {

define i32 @test_comineq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_comineq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -345,8 +428,8 @@ define i32 @test_comineq_sd_0(double %a, double %b) {

define i32 @test_ucomieq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomieq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -360,8 +443,8 @@ define i32 @test_ucomieq_sd_0(double %a, double %b) {

define i32 @test_ucomige_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomige_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -375,8 +458,8 @@ define i32 @test_ucomige_sd_0(double %a, double %b) {

define i32 @test_ucomigt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomigt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -390,8 +473,8 @@ define i32 @test_ucomigt_sd_0(double %a, double %b) {

define i32 @test_ucomile_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomile_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -405,8 +488,8 @@ define i32 @test_ucomile_sd_0(double %a, double %b) {

define i32 @test_ucomilt_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomilt_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand All @@ -420,8 +503,8 @@ define i32 @test_ucomilt_sd_0(double %a, double %b) {

define i32 @test_ucomineq_sd_0(double %a, double %b) {
; CHECK-LABEL: @test_ucomineq_sd_0(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
; CHECK-NEXT: ret i32 [[TMP3]]
;
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/Transforms/InstCombine/X86/x86-sse41.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ define double @test_round_sd_1(double %a, double %b) {
ret double %6
}

define double @test_round_sd_2(double %a) {
; CHECK-LABEL: @test_round_sd_2(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
; CHECK-NEXT: ret double [[TMP3]]
;
%1 = insertelement <2 x double> zeroinitializer, double %a, i32 0
%2 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %1, i32 10)
%3 = extractelement <2 x double> %2, i32 0
ret double %3
}

define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test_round_ss(
; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> [[B:%.*]], i32 10)
Expand Down Expand Up @@ -94,5 +107,18 @@ define float @test_round_ss_2(float %a, float %b) {
ret float %r
}

define float @test_round_ss_3(float %a) {
; CHECK-LABEL: @test_round_ss_3(
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
; CHECK-NEXT: ret float [[TMP3]]
;
%1 = insertelement <4 x float> zeroinitializer, float %a, i32 0
%2 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %1, <4 x float> %1, i32 10)
%3 = extractelement <4 x float> %2, i32 0
ret float %3
}

declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone