299 changes: 299 additions & 0 deletions clang/test/Frontend/fixed_point_conversions.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,15 @@ _Sat unsigned long _Accum sat_ula;
_Sat short _Fract sat_sf;
_Sat _Fract sat_f;
_Sat long _Fract sat_lf;
_Sat unsigned _Fract sat_uf;

short s;
int i;
unsigned int ui;

float fl;
double d;

// CHECK-LABEL: @fix_same1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4
Expand Down Expand Up @@ -695,3 +699,298 @@ void int_sat3() {
void int_sat4() {
sat_usa = ui;
}


// CHECK-LABEL: @float_fix1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 1.280000e+02
// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i16
// CHECK-NEXT: store i16 [[TMP2]], i16* @sa, align 2
// CHECK-NEXT: ret void
//
void float_fix1() {
sa = fl;
}

// CHECK-LABEL: @float_fix2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04
// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i32
// CHECK-NEXT: store i32 [[TMP2]], i32* @a, align 4
// CHECK-NEXT: ret void
//
void float_fix2() {
a = fl;
}

// CHECK-LABEL: @float_fix3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 0x41E0000000000000
// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i64
// CHECK-NEXT: store i64 [[TMP2]], i64* @la, align 8
// CHECK-NEXT: ret void
//
void float_fix3() {
la = fl;
}

// CHECK-LABEL: @float_fix4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 1.280000e+02
// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i8
// CHECK-NEXT: store i8 [[TMP2]], i8* @sf, align 1
// CHECK-NEXT: ret void
//
void float_fix4() {
sf = fl;
}

// CHECK-LABEL: @float_fix5(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 0x41E0000000000000
// CHECK-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i32
// CHECK-NEXT: store i32 [[TMP2]], i32* @lf, align 4
// CHECK-NEXT: ret void
//
void float_fix5() {
lf = fl;
}

// SIGNED-LABEL: @float_fix6(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// SIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 6.553600e+04
// SIGNED-NEXT: [[TMP2:%.*]] = fptoui float [[TMP1]] to i32
// SIGNED-NEXT: store i32 [[TMP2]], i32* @ua, align 4
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @float_fix6(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04
// UNSIGNED-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i32
// UNSIGNED-NEXT: store i32 [[TMP2]], i32* @ua, align 4
// UNSIGNED-NEXT: ret void
//
void float_fix6() {
ua = fl;
}

// SIGNED-LABEL: @float_fix7(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// SIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 6.553600e+04
// SIGNED-NEXT: [[TMP2:%.*]] = fptoui float [[TMP1]] to i16
// SIGNED-NEXT: store i16 [[TMP2]], i16* @uf, align 2
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @float_fix7(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04
// UNSIGNED-NEXT: [[TMP2:%.*]] = fptosi float [[TMP1]] to i16
// UNSIGNED-NEXT: store i16 [[TMP2]], i16* @uf, align 2
// UNSIGNED-NEXT: ret void
//
void float_fix7() {
uf = fl;
}


// CHECK-LABEL: @fix_float1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* @sa, align 2
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i16 [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 7.812500e-03
// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4
// CHECK-NEXT: ret void
//
void fix_float1() {
fl = sa;
}

// CHECK-LABEL: @fix_float2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3F00000000000000
// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4
// CHECK-NEXT: ret void
//
void fix_float2() {
fl = a;
}

// CHECK-LABEL: @fix_float3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @la, align 8
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i64 [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000
// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4
// CHECK-NEXT: ret void
//
void fix_float3() {
fl = la;
}

// CHECK-LABEL: @fix_float4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* @sf, align 1
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i8 [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 7.812500e-03
// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4
// CHECK-NEXT: ret void
//
void fix_float4() {
fl = sf;
}

// CHECK-LABEL: @fix_float5(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @lf, align 4
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000
// CHECK-NEXT: store float [[TMP2]], float* @fl, align 4
// CHECK-NEXT: ret void
//
void fix_float5() {
fl = lf;
}

// SIGNED-LABEL: @fix_float6(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load i32, i32* @ua, align 4
// SIGNED-NEXT: [[TMP1:%.*]] = uitofp i32 [[TMP0]] to float
// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3EF0000000000000
// SIGNED-NEXT: store float [[TMP2]], float* @fl, align 4
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @fix_float6(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load i32, i32* @ua, align 4
// UNSIGNED-NEXT: [[TMP1:%.*]] = uitofp i32 [[TMP0]] to float
// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3F00000000000000
// UNSIGNED-NEXT: store float [[TMP2]], float* @fl, align 4
// UNSIGNED-NEXT: ret void
//
void fix_float6() {
fl = ua;
}

// SIGNED-LABEL: @fix_float7(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @uf, align 2
// SIGNED-NEXT: [[TMP1:%.*]] = uitofp i16 [[TMP0]] to float
// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3EF0000000000000
// SIGNED-NEXT: store float [[TMP2]], float* @fl, align 4
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @fix_float7(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @uf, align 2
// UNSIGNED-NEXT: [[TMP1:%.*]] = uitofp i16 [[TMP0]] to float
// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3F00000000000000
// UNSIGNED-NEXT: store float [[TMP2]], float* @fl, align 4
// UNSIGNED-NEXT: ret void
//
void fix_float7() {
fl = uf;
}


// CHECK-LABEL: @float_sat1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 1.280000e+02
// CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptosi.sat.i16.f32(float [[TMP1]])
// CHECK-NEXT: store i16 [[TMP2]], i16* @sat_sa, align 2
// CHECK-NEXT: ret void
//
void float_sat1() {
sat_sa = fl;
}

// CHECK-LABEL: @float_sat2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[TMP1]])
// CHECK-NEXT: store i32 [[TMP2]], i32* @sat_a, align 4
// CHECK-NEXT: ret void
//
void float_sat2() {
sat_a = fl;
}

// CHECK-LABEL: @float_sat3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 0x41E0000000000000
// CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.fptosi.sat.i64.f32(float [[TMP1]])
// CHECK-NEXT: store i64 [[TMP2]], i64* @sat_la, align 8
// CHECK-NEXT: ret void
//
void float_sat3() {
sat_la = fl;
}

// CHECK-LABEL: @float_sat4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// CHECK-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 1.280000e+02
// CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fptosi.sat.i8.f32(float [[TMP1]])
// CHECK-NEXT: store i8 [[TMP2]], i8* @sat_sf, align 1
// CHECK-NEXT: ret void
//
void float_sat4() {
sat_sf = fl;
}

// SIGNED-LABEL: @float_sat5(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// SIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 6.553600e+04
// SIGNED-NEXT: [[TMP2:%.*]] = call i32 @llvm.fptoui.sat.i32.f32(float [[TMP1]])
// SIGNED-NEXT: store i32 [[TMP2]], i32* @sat_ua, align 4
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @float_sat5(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04
// UNSIGNED-NEXT: [[TMP2:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[TMP1]])
// UNSIGNED-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 0
// UNSIGNED-NEXT: [[SATMIN:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
// UNSIGNED-NEXT: store i32 [[SATMIN]], i32* @sat_ua, align 4
// UNSIGNED-NEXT: ret void
//
void float_sat5() {
sat_ua = fl;
}

// SIGNED-LABEL: @float_sat6(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// SIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 6.553600e+04
// SIGNED-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptoui.sat.i16.f32(float [[TMP1]])
// SIGNED-NEXT: store i16 [[TMP2]], i16* @sat_uf, align 2
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @float_sat6(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load float, float* @fl, align 4
// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul float [[TMP0]], 3.276800e+04
// UNSIGNED-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptosi.sat.i16.f32(float [[TMP1]])
// UNSIGNED-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 0
// UNSIGNED-NEXT: [[SATMIN:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
// UNSIGNED-NEXT: store i16 [[SATMIN]], i16* @sat_uf, align 2
// UNSIGNED-NEXT: ret void
//
void float_sat6() {
sat_uf = fl;
}
309 changes: 309 additions & 0 deletions clang/test/Frontend/fixed_point_conversions_half.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,309 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -ffixed-point -triple arm64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED
// RUN: %clang_cc1 -ffixed-point -triple arm64-unknown-linux-gnu -S -emit-llvm %s -o - -fpadding-on-unsigned-fixed-point | FileCheck %s --check-prefixes=CHECK,UNSIGNED

short _Fract sf;
long _Fract lf;

short _Accum sa;
long _Accum la;

unsigned short _Accum usa;
unsigned long _Accum ula;

_Sat short _Fract sf_sat;
_Sat long _Fract lf_sat;

_Sat short _Accum sa_sat;
_Sat long _Accum la_sat;

_Sat unsigned short _Accum usa_sat;
_Sat unsigned long _Accum ula_sat;

_Float16 h;


// CHECK-LABEL: @half_fix1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// CHECK-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800
// CHECK-NEXT: [[TMP2:%.*]] = fptosi half [[TMP1]] to i8
// CHECK-NEXT: store i8 [[TMP2]], i8* @sf, align 1
// CHECK-NEXT: ret void
//
void half_fix1() {
sf = h;
}

// CHECK-LABEL: @half_fix2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// CHECK-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000
// CHECK-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i32
// CHECK-NEXT: store i32 [[TMP3]], i32* @lf, align 4
// CHECK-NEXT: ret void
//
void half_fix2() {
lf = h;
}

// CHECK-LABEL: @half_fix3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// CHECK-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800
// CHECK-NEXT: [[TMP2:%.*]] = fptosi half [[TMP1]] to i16
// CHECK-NEXT: store i16 [[TMP2]], i16* @sa, align 2
// CHECK-NEXT: ret void
//
void half_fix3() {
sa = h;
}

// CHECK-LABEL: @half_fix4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// CHECK-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000
// CHECK-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i64
// CHECK-NEXT: store i64 [[TMP3]], i64* @la, align 8
// CHECK-NEXT: ret void
//
void half_fix4() {
la = h;
}

// SIGNED-LABEL: @half_fix5(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// SIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 2.560000e+02
// SIGNED-NEXT: [[TMP3:%.*]] = fptoui float [[TMP2]] to i16
// SIGNED-NEXT: store i16 [[TMP3]], i16* @usa, align 2
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @half_fix5(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800
// UNSIGNED-NEXT: [[TMP2:%.*]] = fptosi half [[TMP1]] to i16
// UNSIGNED-NEXT: store i16 [[TMP2]], i16* @usa, align 2
// UNSIGNED-NEXT: ret void
//
void half_fix5() {
usa = h;
}

// SIGNED-LABEL: @half_fix6(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// SIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41F0000000000000
// SIGNED-NEXT: [[TMP3:%.*]] = fptoui float [[TMP2]] to i64
// SIGNED-NEXT: store i64 [[TMP3]], i64* @ula, align 8
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @half_fix6(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// UNSIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000
// UNSIGNED-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i64
// UNSIGNED-NEXT: store i64 [[TMP3]], i64* @ula, align 8
// UNSIGNED-NEXT: ret void
//
void half_fix6() {
ula = h;
}


// CHECK-LABEL: @half_sat1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// CHECK-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800
// CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.fptosi.sat.i8.f16(half [[TMP1]])
// CHECK-NEXT: store i8 [[TMP2]], i8* @sf_sat, align 1
// CHECK-NEXT: ret void
//
void half_sat1() {
sf_sat = h;
}

// CHECK-LABEL: @half_sat2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// CHECK-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.fptosi.sat.i32.f32(float [[TMP2]])
// CHECK-NEXT: store i32 [[TMP3]], i32* @lf_sat, align 4
// CHECK-NEXT: ret void
//
void half_sat2() {
lf_sat = h;
}

// CHECK-LABEL: @half_sat3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// CHECK-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800
// CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptosi.sat.i16.f16(half [[TMP1]])
// CHECK-NEXT: store i16 [[TMP2]], i16* @sa_sat, align 2
// CHECK-NEXT: ret void
//
void half_sat3() {
sa_sat = h;
}

// CHECK-LABEL: @half_sat4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// CHECK-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000
// CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.fptosi.sat.i64.f32(float [[TMP2]])
// CHECK-NEXT: store i64 [[TMP3]], i64* @la_sat, align 8
// CHECK-NEXT: ret void
//
void half_sat4() {
la_sat = h;
}

// SIGNED-LABEL: @half_sat5(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// SIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 2.560000e+02
// SIGNED-NEXT: [[TMP3:%.*]] = call i16 @llvm.fptoui.sat.i16.f32(float [[TMP2]])
// SIGNED-NEXT: store i16 [[TMP3]], i16* @usa_sat, align 2
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @half_sat5(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// UNSIGNED-NEXT: [[TMP1:%.*]] = fmul half [[TMP0]], 0xH5800
// UNSIGNED-NEXT: [[TMP2:%.*]] = call i16 @llvm.fptosi.sat.i16.f16(half [[TMP1]])
// UNSIGNED-NEXT: [[TMP3:%.*]] = icmp slt i16 [[TMP2]], 0
// UNSIGNED-NEXT: [[SATMIN:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP2]]
// UNSIGNED-NEXT: store i16 [[SATMIN]], i16* @usa_sat, align 2
// UNSIGNED-NEXT: ret void
//
void half_sat5() {
usa_sat = h;
}

// SIGNED-LABEL: @half_sat6(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// SIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41F0000000000000
// SIGNED-NEXT: [[TMP3:%.*]] = call i64 @llvm.fptoui.sat.i64.f32(float [[TMP2]])
// SIGNED-NEXT: store i64 [[TMP3]], i64* @ula_sat, align 8
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @half_sat6(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load half, half* @h, align 2
// UNSIGNED-NEXT: [[TMP1:%.*]] = fpext half [[TMP0]] to float
// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x41E0000000000000
// UNSIGNED-NEXT: [[TMP3:%.*]] = call i64 @llvm.fptosi.sat.i64.f32(float [[TMP2]])
// UNSIGNED-NEXT: [[TMP4:%.*]] = icmp slt i64 [[TMP3]], 0
// UNSIGNED-NEXT: [[SATMIN:%.*]] = select i1 [[TMP4]], i64 0, i64 [[TMP3]]
// UNSIGNED-NEXT: store i64 [[SATMIN]], i64* @ula_sat, align 8
// UNSIGNED-NEXT: ret void
//
void half_sat6() {
ula_sat = h;
}


// CHECK-LABEL: @fix_half1(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* @sf, align 1
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i8 [[TMP0]] to half
// CHECK-NEXT: [[TMP2:%.*]] = fmul half [[TMP1]], 0xH2000
// CHECK-NEXT: store half [[TMP2]], half* @h, align 2
// CHECK-NEXT: ret void
//
void fix_half1() {
h = sf;
}

// CHECK-LABEL: @fix_half2(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @lf, align 4
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i32 [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000
// CHECK-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half
// CHECK-NEXT: store half [[TMP3]], half* @h, align 2
// CHECK-NEXT: ret void
//
void fix_half2() {
h = lf;
}

// CHECK-LABEL: @fix_half3(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* @sa, align 2
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i16 [[TMP0]] to half
// CHECK-NEXT: [[TMP2:%.*]] = fmul half [[TMP1]], 0xH2000
// CHECK-NEXT: store half [[TMP2]], half* @h, align 2
// CHECK-NEXT: ret void
//
void fix_half3() {
h = sa;
}

// CHECK-LABEL: @fix_half4(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @la, align 8
// CHECK-NEXT: [[TMP1:%.*]] = sitofp i64 [[TMP0]] to float
// CHECK-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000
// CHECK-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half
// CHECK-NEXT: store half [[TMP3]], half* @h, align 2
// CHECK-NEXT: ret void
//
void fix_half4() {
h = la;
}

// SIGNED-LABEL: @fix_half5(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @usa, align 2
// SIGNED-NEXT: [[TMP1:%.*]] = uitofp i16 [[TMP0]] to float
// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 3.906250e-03
// SIGNED-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half
// SIGNED-NEXT: store half [[TMP3]], half* @h, align 2
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @fix_half5(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load i16, i16* @usa, align 2
// UNSIGNED-NEXT: [[TMP1:%.*]] = uitofp i16 [[TMP0]] to half
// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul half [[TMP1]], 0xH2000
// UNSIGNED-NEXT: store half [[TMP2]], half* @h, align 2
// UNSIGNED-NEXT: ret void
//
void fix_half5() {
h = usa;
}

// SIGNED-LABEL: @fix_half6(
// SIGNED-NEXT: entry:
// SIGNED-NEXT: [[TMP0:%.*]] = load i64, i64* @ula, align 8
// SIGNED-NEXT: [[TMP1:%.*]] = uitofp i64 [[TMP0]] to float
// SIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3DF0000000000000
// SIGNED-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half
// SIGNED-NEXT: store half [[TMP3]], half* @h, align 2
// SIGNED-NEXT: ret void
//
// UNSIGNED-LABEL: @fix_half6(
// UNSIGNED-NEXT: entry:
// UNSIGNED-NEXT: [[TMP0:%.*]] = load i64, i64* @ula, align 8
// UNSIGNED-NEXT: [[TMP1:%.*]] = uitofp i64 [[TMP0]] to float
// UNSIGNED-NEXT: [[TMP2:%.*]] = fmul float [[TMP1]], 0x3E00000000000000
// UNSIGNED-NEXT: [[TMP3:%.*]] = fptrunc float [[TMP2]] to half
// UNSIGNED-NEXT: store half [[TMP3]], half* @h, align 2
// UNSIGNED-NEXT: ret void
//
void fix_half6() {
h = ula;
}
59 changes: 59 additions & 0 deletions llvm/include/llvm/IR/FixedPointBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ template <class IRBuilderTy> class FixedPointBuilder {
C.isSigned(), C.isSaturated(), BothPadded);
}

/// Given a floating point type and a fixed-point semantic, return a floating
/// point type which can accommodate the fixed-point semantic. This is either
/// \p Ty, or a floating point type with a larger exponent than Ty.
Type *getAccommodatingFloatType(Type *Ty, const FixedPointSemantics &Sema) {
const fltSemantics *FloatSema = &Ty->getFltSemantics();
while (!Sema.fitsInFloatSemantics(*FloatSema))
FloatSema = APFixedPoint::promoteFloatSemantics(FloatSema);
return Type::getFloatingPointTy(Ty->getContext(), *FloatSema);
}

public:
FixedPointBuilder(IRBuilderTy &Builder) : B(Builder) {}

Expand Down Expand Up @@ -159,6 +169,55 @@ template <class IRBuilderTy> class FixedPointBuilder {
DstSema, false);
}

Value *CreateFixedToFloating(Value *Src, const FixedPointSemantics &SrcSema,
Type *DstTy) {
Value *Result;
Type *OpTy = getAccommodatingFloatType(DstTy, SrcSema);
// Convert the raw fixed-point value directly to floating point. If the
// value is too large to fit, it will be rounded, not truncated.
Result = SrcSema.isSigned() ? B.CreateSIToFP(Src, OpTy)
: B.CreateUIToFP(Src, OpTy);
// Rescale the integral-in-floating point by the scaling factor. This is
// lossless, except for overflow to infinity which is unlikely.
Result = B.CreateFMul(Result,
ConstantFP::get(OpTy, std::pow(2, -(int)SrcSema.getScale())));
if (OpTy != DstTy)
Result = B.CreateFPTrunc(Result, DstTy);
return Result;
}

Value *CreateFloatingToFixed(Value *Src, const FixedPointSemantics &DstSema) {
bool UseSigned = DstSema.isSigned() || DstSema.hasUnsignedPadding();
Value *Result = Src;
Type *OpTy = getAccommodatingFloatType(Src->getType(), DstSema);
if (OpTy != Src->getType())
Result = B.CreateFPExt(Result, OpTy);
// Rescale the floating point value so that its significant bits (for the
// purposes of the conversion) are in the integral range.
Result = B.CreateFMul(Result,
ConstantFP::get(OpTy, std::pow(2, DstSema.getScale())));

Type *ResultTy = B.getIntNTy(DstSema.getWidth());
if (DstSema.isSaturated()) {
Intrinsic::ID IID =
UseSigned ? Intrinsic::fptosi_sat : Intrinsic::fptoui_sat;
Result = B.CreateIntrinsic(IID, {ResultTy, OpTy}, {Result});
} else {
Result = UseSigned ? B.CreateFPToSI(Result, ResultTy)
: B.CreateFPToUI(Result, ResultTy);
}

// When saturating unsigned-with-padding using signed operations, we may
// get negative values. Emit an extra clamp to zero.
if (DstSema.isSaturated() && DstSema.hasUnsignedPadding()) {
Constant *Zero = Constant::getNullValue(Result->getType());
Result =
B.CreateSelect(B.CreateICmpSLT(Result, Zero), Zero, Result, "satmin");
}

return Result;
}

/// Add two fixed-point values and return the result in their common semantic.
/// \p LHS - The left hand side
/// \p LHSSema - The semantic of the left hand side
Expand Down
13 changes: 6 additions & 7 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48754,11 +48754,10 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
if (!VT.isVector())
return SDValue();

// PSUBUS is supported, starting from SSE2, but truncation for v8i32
// is only worth it with SSSE3 (PSHUFB).
// PSUBUS is supported, starting from SSE2.
EVT EltVT = VT.getVectorElementType();
if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16)) &&
!(Subtarget.hasSSSE3() && (VT == MVT::v8i32 || VT == MVT::v8i64)) &&
if (!(Subtarget.hasSSE2() && (EltVT == MVT::i8 || EltVT == MVT::i16 ||
VT == MVT::v8i32 || VT == MVT::v8i64)) &&
!(Subtarget.useBWIRegs() && (VT == MVT::v16i32)))
return SDValue();

Expand Down Expand Up @@ -48795,8 +48794,8 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
SDValue MinLHS = Op1.getOperand(0).getOperand(0);
SDValue MinRHS = Op1.getOperand(0).getOperand(1);
EVT TruncVT = Op1.getOperand(0).getValueType();
if (!(Subtarget.hasSSSE3() && (TruncVT == MVT::v8i32 ||
TruncVT == MVT::v8i64)) &&
if (!(Subtarget.hasSSE2() &&
(TruncVT == MVT::v8i32 || TruncVT == MVT::v8i64)) &&
!(Subtarget.useBWIRegs() && (TruncVT == MVT::v16i32)))
return SDValue();
SDValue OpToSaturate;
Expand Down Expand Up @@ -48835,7 +48834,7 @@ static SDValue combineSubToSubus(SDNode *N, SelectionDAG &DAG,
// values, or first 48 bits for 64 bit values.
KnownBits Known = DAG.computeKnownBits(SubusLHS);
unsigned NumZeros = Known.countMinLeadingZeros();
if ((VT == MVT::v8i64 && NumZeros < 48) || NumZeros < 16)
if (NumZeros < (VT.getScalarSizeInBits() - 16))
return SDValue();

EVT ExtType = SubusLHS.getValueType();
Expand Down
293 changes: 132 additions & 161 deletions llvm/test/CodeGen/X86/psubus.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1382,33 +1382,32 @@ vector.ph:
define <8 x i16> @psubus_8i32_max(<8 x i16> %x, <8 x i32> %y) nounwind {
; SSE2-LABEL: psubus_8i32_max:
; SSE2: # %bb.0: # %vector.ph
; SSE2-NEXT: movdqa %xmm0, %xmm3
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm6
; SSE2-NEXT: pxor %xmm5, %xmm6
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: por %xmm5, %xmm4
; SSE2-NEXT: pcmpgtd %xmm6, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm3
; SSE2-NEXT: pandn %xmm2, %xmm4
; SSE2-NEXT: por %xmm3, %xmm4
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: pxor %xmm5, %xmm3
; SSE2-NEXT: por %xmm0, %xmm5
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: pxor %xmm3, %xmm4
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
; SSE2-NEXT: movdqa %xmm5, %xmm6
; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
; SSE2-NEXT: pand %xmm6, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm6
; SSE2-NEXT: por %xmm2, %xmm6
; SSE2-NEXT: pslld $16, %xmm6
; SSE2-NEXT: psrad $16, %xmm6
; SSE2-NEXT: pxor %xmm1, %xmm3
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; SSE2-NEXT: pand %xmm5, %xmm0
; SSE2-NEXT: pandn %xmm1, %xmm5
; SSE2-NEXT: por %xmm5, %xmm0
; SSE2-NEXT: psubd %xmm1, %xmm0
; SSE2-NEXT: psubd %xmm2, %xmm4
; SSE2-NEXT: pslld $16, %xmm4
; SSE2-NEXT: psrad $16, %xmm4
; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: pxor %xmm5, %xmm4
; SSE2-NEXT: pand %xmm1, %xmm5
; SSE2-NEXT: por %xmm4, %xmm5
; SSE2-NEXT: pslld $16, %xmm5
; SSE2-NEXT: psrad $16, %xmm5
; SSE2-NEXT: packssdw %xmm6, %xmm5
; SSE2-NEXT: psubusw %xmm5, %xmm0
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm4, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: psubus_8i32_max:
Expand Down Expand Up @@ -1483,91 +1482,72 @@ vector.ph:
define <8 x i16> @psubus_8i64_max(<8 x i16> %x, <8 x i64> %y) nounwind {
; SSE2-LABEL: psubus_8i64_max:
; SSE2: # %bb.0: # %vector.ph
; SSE2-NEXT: pxor %xmm5, %xmm5
; SSE2-NEXT: movdqa %xmm0, %xmm10
; SSE2-NEXT: punpckhwd {{.*#+}} xmm10 = xmm10[4],xmm5[4],xmm10[5],xmm5[5],xmm10[6],xmm5[6],xmm10[7],xmm5[7]
; SSE2-NEXT: movdqa %xmm10, %xmm8
; SSE2-NEXT: punpckldq {{.*#+}} xmm8 = xmm8[0],xmm5[0],xmm8[1],xmm5[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm10 = xmm10[2],xmm5[2],xmm10[3],xmm5[3]
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3]
; SSE2-NEXT: movdqa %xmm0, %xmm9
; SSE2-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm5[0],xmm9[1],xmm5[1]
; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm5[2],xmm0[3],xmm5[3]
; SSE2-NEXT: movdqa {{.*#+}} xmm11 = [9223372039002259456,9223372039002259456]
; SSE2-NEXT: movdqa %xmm2, %xmm6
; SSE2-NEXT: pxor %xmm11, %xmm6
; SSE2-NEXT: movdqa %xmm0, %xmm7
; SSE2-NEXT: por %xmm11, %xmm7
; SSE2-NEXT: movdqa %xmm7, %xmm5
; SSE2-NEXT: pcmpgtd %xmm6, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm5[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
; SSE2-NEXT: pand %xmm12, %xmm7
; SSE2-NEXT: pshufd {{.*#+}} xmm13 = xmm5[1,1,3,3]
; SSE2-NEXT: por %xmm7, %xmm13
; SSE2-NEXT: pand %xmm13, %xmm0
; SSE2-NEXT: pandn %xmm2, %xmm13
; SSE2-NEXT: por %xmm0, %xmm13
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm11, %xmm0
; SSE2-NEXT: movdqa %xmm9, %xmm5
; SSE2-NEXT: por %xmm11, %xmm5
; SSE2-NEXT: movdqa %xmm5, %xmm7
; SSE2-NEXT: pcmpgtd %xmm0, %xmm7
; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm7[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm0, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
; SSE2-NEXT: pand %xmm12, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
; SSE2-NEXT: por %xmm5, %xmm0
; SSE2-NEXT: pand %xmm0, %xmm9
; SSE2-NEXT: pandn %xmm1, %xmm0
; SSE2-NEXT: por %xmm9, %xmm0
; SSE2-NEXT: movdqa %xmm4, %xmm5
; SSE2-NEXT: pxor %xmm11, %xmm5
; SSE2-NEXT: movdqa %xmm10, %xmm7
; SSE2-NEXT: por %xmm11, %xmm7
; SSE2-NEXT: movdqa %xmm7, %xmm6
; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm5, %xmm7
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
; SSE2-NEXT: pand %xmm9, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[1,1,3,3]
; SSE2-NEXT: por %xmm5, %xmm7
; SSE2-NEXT: pand %xmm7, %xmm10
; SSE2-NEXT: pandn %xmm4, %xmm7
; SSE2-NEXT: por %xmm10, %xmm7
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: pxor %xmm11, %xmm5
; SSE2-NEXT: por %xmm8, %xmm11
; SSE2-NEXT: movdqa %xmm11, %xmm6
; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [9223372039002259456,9223372039002259456]
; SSE2-NEXT: movdqa %xmm2, %xmm7
; SSE2-NEXT: pxor %xmm5, %xmm7
; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [9223372039002324991,9223372039002324991]
; SSE2-NEXT: movdqa %xmm8, %xmm6
; SSE2-NEXT: pcmpgtd %xmm7, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm5, %xmm11
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm11[1,1,3,3]
; SSE2-NEXT: pand %xmm9, %xmm5
; SSE2-NEXT: pcmpeqd %xmm8, %xmm7
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
; SSE2-NEXT: pand %xmm9, %xmm7
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
; SSE2-NEXT: por %xmm5, %xmm6
; SSE2-NEXT: pand %xmm6, %xmm8
; SSE2-NEXT: pandn %xmm3, %xmm6
; SSE2-NEXT: por %xmm8, %xmm6
; SSE2-NEXT: psubq %xmm3, %xmm6
; SSE2-NEXT: psubq %xmm4, %xmm7
; SSE2-NEXT: psubq %xmm1, %xmm0
; SSE2-NEXT: psubq %xmm2, %xmm13
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm13[0,2,2,3]
; SSE2-NEXT: por %xmm7, %xmm6
; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [65535,65535]
; SSE2-NEXT: pand %xmm6, %xmm2
; SSE2-NEXT: pandn %xmm9, %xmm6
; SSE2-NEXT: por %xmm2, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm10 = xmm2[0,2,2,3,4,5,6,7]
; SSE2-NEXT: movdqa %xmm1, %xmm6
; SSE2-NEXT: pxor %xmm5, %xmm6
; SSE2-NEXT: movdqa %xmm8, %xmm7
; SSE2-NEXT: pcmpgtd %xmm6, %xmm7
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm8, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
; SSE2-NEXT: pand %xmm2, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm7[1,1,3,3]
; SSE2-NEXT: por %xmm6, %xmm2
; SSE2-NEXT: pand %xmm2, %xmm1
; SSE2-NEXT: pandn %xmm9, %xmm2
; SSE2-NEXT: por %xmm1, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm7[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,1,0,2,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm10[0],xmm1[1],xmm10[1]
; SSE2-NEXT: movdqa %xmm4, %xmm2
; SSE2-NEXT: pxor %xmm5, %xmm2
; SSE2-NEXT: movdqa %xmm8, %xmm6
; SSE2-NEXT: pcmpgtd %xmm2, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm8, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
; SSE2-NEXT: pand %xmm7, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
; SSE2-NEXT: por %xmm2, %xmm6
; SSE2-NEXT: pand %xmm6, %xmm4
; SSE2-NEXT: pandn %xmm9, %xmm6
; SSE2-NEXT: por %xmm4, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,1,0,2,4,5,6,7]
; SSE2-NEXT: pxor %xmm3, %xmm5
; SSE2-NEXT: movdqa %xmm8, %xmm4
; SSE2-NEXT: pcmpgtd %xmm5, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm4[0,0,2,2]
; SSE2-NEXT: pcmpeqd %xmm8, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
; SSE2-NEXT: pand %xmm6, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
; SSE2-NEXT: por %xmm5, %xmm4
; SSE2-NEXT: pand %xmm4, %xmm3
; SSE2-NEXT: pandn %xmm9, %xmm4
; SSE2-NEXT: por %xmm3, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
; SSE2-NEXT: pshuflw {{.*#+}} xmm3 = xmm3[0,1,0,2,4,5,6,7]
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-NEXT: movsd {{.*#+}} xmm3 = xmm1[0],xmm3[1]
; SSE2-NEXT: psubusw %xmm3, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: psubus_8i64_max:
Expand Down Expand Up @@ -1943,35 +1923,32 @@ vector.ph:
define <8 x i16> @psubus_i16_i32_max_swapped(<8 x i16> %x, <8 x i32> %y) nounwind {
; SSE2-LABEL: psubus_i16_i32_max_swapped:
; SSE2: # %bb.0: # %vector.ph
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm3
; SSE2-NEXT: pxor %xmm5, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm6
; SSE2-NEXT: por %xmm5, %xmm6
; SSE2-NEXT: pcmpgtd %xmm6, %xmm3
; SSE2-NEXT: movdqa %xmm2, %xmm6
; SSE2-NEXT: pand %xmm3, %xmm6
; SSE2-NEXT: pandn %xmm0, %xmm3
; SSE2-NEXT: por %xmm6, %xmm3
; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm5, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: pxor %xmm3, %xmm4
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
; SSE2-NEXT: movdqa %xmm5, %xmm6
; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
; SSE2-NEXT: pand %xmm6, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm6
; SSE2-NEXT: por %xmm2, %xmm6
; SSE2-NEXT: pslld $16, %xmm6
; SSE2-NEXT: psrad $16, %xmm6
; SSE2-NEXT: pxor %xmm1, %xmm3
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; SSE2-NEXT: pxor %xmm5, %xmm4
; SSE2-NEXT: pand %xmm1, %xmm5
; SSE2-NEXT: por %xmm4, %xmm5
; SSE2-NEXT: pcmpgtd %xmm5, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: pand %xmm0, %xmm5
; SSE2-NEXT: pandn %xmm4, %xmm0
; SSE2-NEXT: por %xmm5, %xmm0
; SSE2-NEXT: psubd %xmm1, %xmm0
; SSE2-NEXT: psubd %xmm2, %xmm3
; SSE2-NEXT: pslld $16, %xmm3
; SSE2-NEXT: psrad $16, %xmm3
; SSE2-NEXT: pslld $16, %xmm0
; SSE2-NEXT: pslld $16, %xmm5
; SSE2-NEXT: psrad $16, %xmm5
; SSE2-NEXT: packssdw %xmm6, %xmm5
; SSE2-NEXT: psubusw %xmm5, %xmm0
; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
; SSE2-NEXT: psrad $16, %xmm0
; SSE2-NEXT: packssdw %xmm3, %xmm0
; SSE2-NEXT: packssdw %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: psubus_i16_i32_max_swapped:
Expand Down Expand Up @@ -2046,33 +2023,27 @@ vector.ph:
define <8 x i16> @psubus_i16_i32_min(<8 x i16> %x, <8 x i32> %y) nounwind {
; SSE2-LABEL: psubus_i16_i32_min:
; SSE2: # %bb.0: # %vector.ph
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
; SSE2-NEXT: movdqa %xmm0, %xmm5
; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1],xmm5[2],xmm3[2],xmm5[3],xmm3[3]
; SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm1, %xmm3
; SSE2-NEXT: pxor %xmm6, %xmm3
; SSE2-NEXT: movdqa %xmm5, %xmm7
; SSE2-NEXT: por %xmm6, %xmm7
; SSE2-NEXT: pcmpgtd %xmm7, %xmm3
; SSE2-NEXT: pand %xmm3, %xmm5
; SSE2-NEXT: pandn %xmm1, %xmm3
; SSE2-NEXT: por %xmm5, %xmm3
; SSE2-NEXT: movdqa %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm6, %xmm1
; SSE2-NEXT: por %xmm4, %xmm6
; SSE2-NEXT: pcmpgtd %xmm6, %xmm1
; SSE2-NEXT: pand %xmm1, %xmm4
; SSE2-NEXT: pandn %xmm2, %xmm1
; SSE2-NEXT: por %xmm4, %xmm1
; SSE2-NEXT: pslld $16, %xmm1
; SSE2-NEXT: psrad $16, %xmm1
; SSE2-NEXT: pslld $16, %xmm3
; SSE2-NEXT: psrad $16, %xmm3
; SSE2-NEXT: packssdw %xmm1, %xmm3
; SSE2-NEXT: psubw %xmm3, %xmm0
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: pxor %xmm3, %xmm4
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
; SSE2-NEXT: movdqa %xmm5, %xmm6
; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
; SSE2-NEXT: pand %xmm6, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm6
; SSE2-NEXT: por %xmm2, %xmm6
; SSE2-NEXT: pslld $16, %xmm6
; SSE2-NEXT: psrad $16, %xmm6
; SSE2-NEXT: pxor %xmm1, %xmm3
; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
; SSE2-NEXT: pxor %xmm5, %xmm4
; SSE2-NEXT: pand %xmm1, %xmm5
; SSE2-NEXT: por %xmm4, %xmm5
; SSE2-NEXT: pslld $16, %xmm5
; SSE2-NEXT: psrad $16, %xmm5
; SSE2-NEXT: packssdw %xmm6, %xmm5
; SSE2-NEXT: psubusw %xmm5, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: psubus_i16_i32_min:
Expand Down