Skip to content

Commit

Permalink
[TargetLowering] expandFP_TO_UINT - avoid FPE due to out of range con…
Browse files Browse the repository at this point in the history
…version (PR17686)

PR17686 demonstrates that for some targets FP exceptions can fire in cases where the FP_TO_UINT is expanded using a FP_TO_SINT instruction.

The existing code converts both the inrange and outofrange cases using FP_TO_SINT and then selects the result, this patch changes this for 'strict' cases to pre-select the FP_TO_SINT input and the offset adjustment.

The X87 cases don't need the strict flag but generates much nicer code with it....

Differential Revision: https://reviews.llvm.org/D53794

llvm-svn: 348251
  • Loading branch information
RKSimon committed Dec 4, 2018
1 parent eecf487 commit 0add090
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 130 deletions.
10 changes: 10 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Expand Up @@ -1746,6 +1746,16 @@ class TargetLoweringBase {
return false;
}

/// Return true if it is more correct/profitable to use strict FP_TO_INT
/// conversion operations - canonicalizing the FP source value instead of
/// converting all cases and then selecting based on value.
/// This may be true if the target throws exceptions for out of bounds
/// conversions or has fast FP CMOV.
virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
bool IsSigned) const {
return false;
}

//===--------------------------------------------------------------------===//
// TargetLowering Configuration Methods - These methods should be invoked by
// the derived class constructor to configure this object for the target.
Expand Down
41 changes: 30 additions & 11 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Expand Up @@ -4200,20 +4200,39 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
return true;
}

// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
// Result = select (Src < 0x8000000000000000), True, False
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);

SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
// TODO: Should any fast-math-flags be set for the FSUB?
SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
DAG.getConstant(SignMask, dl, DstVT));
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
if (Strict) {
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
// signmask then offset (the result of which should be fully representable).
// Sel = Src < 0x8000000000000000
// Val = select Sel, Src, Src - 0x8000000000000000
// Ofs = select Sel, 0, 0x8000000000000000
// Result = fp_to_sint(Val) ^ Ofs

// TODO: Should any fast-math-flags be set for the FSUB?
SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
DAG.getConstant(SignMask, dl, DstVT));
Result = DAG.getNode(ISD::XOR, dl, DstVT,
DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
} else {
// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
// False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
// Result = select (Src < 0x8000000000000000), True, False

SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
// TODO: Should any fast-math-flags be set for the FSUB?
SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
False = DAG.getNode(ISD::XOR, dl, DstVT, False,
DAG.getConstant(SignMask, dl, DstVT));
Result = DAG.getSelect(dl, DstVT, Sel, True, False);
}
return true;
}

Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -4812,6 +4812,12 @@ bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
(1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
}

bool X86TargetLowering::shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
bool IsSigned) const {
// f80 UINT_TO_FP is more efficient using Strict code if FCMOV is available.
return !IsSigned && FpVT == MVT::f80 && Subtarget.hasCMov();
}

bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
unsigned Index) const {
if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -1047,6 +1047,9 @@ namespace llvm {

bool decomposeMulByConstant(EVT VT, SDValue C) const override;

bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
bool IsSigned) const override;

/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
/// with this index.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
Expand Down
88 changes: 32 additions & 56 deletions llvm/test/CodeGen/X86/fp-cvt.ll
Expand Up @@ -483,29 +483,20 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
; X64-X87-NEXT: flds {{.*}}(%rip)
; X64-X87-NEXT: fld %st(1)
; X64-X87-NEXT: fsub %st(1)
; X64-X87-NEXT: xorl %eax, %eax
; X64-X87-NEXT: fxch %st(1)
; X64-X87-NEXT: fucompi %st(2)
; X64-X87-NEXT: fcmovnbe %st(1), %st(0)
; X64-X87-NEXT: fstp %st(1)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fld %st(1)
; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fucompi %st(1)
; X64-X87-NEXT: fstp %st(0)
; X64-X87-NEXT: jbe .LBB10_1
; X64-X87-NEXT: # %bb.2:
; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-X87-NEXT: retq
; X64-X87-NEXT: .LBB10_1:
; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-X87-NEXT: setbe %al
; X64-X87-NEXT: shlq $63, %rax
; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; X64-X87-NEXT: retq
;
Expand All @@ -515,17 +506,14 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
; X64-SSSE3-NEXT: flds {{.*}}(%rip)
; X64-SSSE3-NEXT: fld %st(1)
; X64-SSSE3-NEXT: fsub %st(1)
; X64-SSSE3-NEXT: xorl %eax, %eax
; X64-SSSE3-NEXT: fxch %st(1)
; X64-SSSE3-NEXT: fucompi %st(2)
; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0)
; X64-SSSE3-NEXT: fstp %st(1)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fld %st(1)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fucompi %st(1)
; X64-SSSE3-NEXT: fstp %st(0)
; X64-SSSE3-NEXT: jbe .LBB10_1
; X64-SSSE3-NEXT: # %bb.2:
; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-SSSE3-NEXT: retq
; X64-SSSE3-NEXT: .LBB10_1:
; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-SSSE3-NEXT: setbe %al
; X64-SSSE3-NEXT: shlq $63, %rax
; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; X64-SSSE3-NEXT: retq
%1 = fptoui x86_fp80 %a0 to i64
Expand Down Expand Up @@ -577,29 +565,20 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
; X64-X87-NEXT: flds {{.*}}(%rip)
; X64-X87-NEXT: fld %st(1)
; X64-X87-NEXT: fsub %st(1)
; X64-X87-NEXT: xorl %eax, %eax
; X64-X87-NEXT: fxch %st(1)
; X64-X87-NEXT: fucompi %st(2)
; X64-X87-NEXT: fcmovnbe %st(1), %st(0)
; X64-X87-NEXT: fstp %st(1)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; X64-X87-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; X64-X87-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fld %st(1)
; X64-X87-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fistpll -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fldcw -{{[0-9]+}}(%rsp)
; X64-X87-NEXT: fucompi %st(1)
; X64-X87-NEXT: fstp %st(0)
; X64-X87-NEXT: jbe .LBB11_1
; X64-X87-NEXT: # %bb.2:
; X64-X87-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-X87-NEXT: retq
; X64-X87-NEXT: .LBB11_1:
; X64-X87-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-X87-NEXT: setbe %al
; X64-X87-NEXT: shlq $63, %rax
; X64-X87-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; X64-X87-NEXT: retq
;
Expand All @@ -609,17 +588,14 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
; X64-SSSE3-NEXT: flds {{.*}}(%rip)
; X64-SSSE3-NEXT: fld %st(1)
; X64-SSSE3-NEXT: fsub %st(1)
; X64-SSSE3-NEXT: xorl %eax, %eax
; X64-SSSE3-NEXT: fxch %st(1)
; X64-SSSE3-NEXT: fucompi %st(2)
; X64-SSSE3-NEXT: fcmovnbe %st(1), %st(0)
; X64-SSSE3-NEXT: fstp %st(1)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fld %st(1)
; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; X64-SSSE3-NEXT: fucompi %st(1)
; X64-SSSE3-NEXT: fstp %st(0)
; X64-SSSE3-NEXT: jbe .LBB11_1
; X64-SSSE3-NEXT: # %bb.2:
; X64-SSSE3-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; X64-SSSE3-NEXT: retq
; X64-SSSE3-NEXT: .LBB11_1:
; X64-SSSE3-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; X64-SSSE3-NEXT: setbe %al
; X64-SSSE3-NEXT: shlq $63, %rax
; X64-SSSE3-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; X64-SSSE3-NEXT: retq
%1 = load x86_fp80, x86_fp80 *%a0
Expand Down
100 changes: 37 additions & 63 deletions llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
Expand Up @@ -1147,25 +1147,21 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
;
; SSE3_64_WIN-LABEL: x_to_u64:
; SSE3_64_WIN: # %bb.0:
; SSE3_64_WIN-NEXT: subq $16, %rsp
; SSE3_64_WIN-NEXT: pushq %rax
; SSE3_64_WIN-NEXT: fldt (%rcx)
; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip)
; SSE3_64_WIN-NEXT: fld %st(1)
; SSE3_64_WIN-NEXT: fsub %st(1)
; SSE3_64_WIN-NEXT: fisttpll {{[0-9]+}}(%rsp)
; SSE3_64_WIN-NEXT: fld %st(1)
; SSE3_64_WIN-NEXT: xorl %eax, %eax
; SSE3_64_WIN-NEXT: fxch %st(1)
; SSE3_64_WIN-NEXT: fucompi %st(2)
; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st(0)
; SSE3_64_WIN-NEXT: fstp %st(1)
; SSE3_64_WIN-NEXT: fisttpll (%rsp)
; SSE3_64_WIN-NEXT: fucompi %st(1)
; SSE3_64_WIN-NEXT: fstp %st(0)
; SSE3_64_WIN-NEXT: jbe .LBB4_1
; SSE3_64_WIN-NEXT: # %bb.2:
; SSE3_64_WIN-NEXT: movq (%rsp), %rax
; SSE3_64_WIN-NEXT: addq $16, %rsp
; SSE3_64_WIN-NEXT: retq
; SSE3_64_WIN-NEXT: .LBB4_1:
; SSE3_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE3_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax
; SSE3_64_WIN-NEXT: addq $16, %rsp
; SSE3_64_WIN-NEXT: setbe %al
; SSE3_64_WIN-NEXT: shlq $63, %rax
; SSE3_64_WIN-NEXT: xorq (%rsp), %rax
; SSE3_64_WIN-NEXT: popq %rcx
; SSE3_64_WIN-NEXT: retq
;
; SSE3_64_LIN-LABEL: x_to_u64:
Expand All @@ -1174,17 +1170,14 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
; SSE3_64_LIN-NEXT: flds {{.*}}(%rip)
; SSE3_64_LIN-NEXT: fld %st(1)
; SSE3_64_LIN-NEXT: fsub %st(1)
; SSE3_64_LIN-NEXT: xorl %eax, %eax
; SSE3_64_LIN-NEXT: fxch %st(1)
; SSE3_64_LIN-NEXT: fucompi %st(2)
; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st(0)
; SSE3_64_LIN-NEXT: fstp %st(1)
; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; SSE3_64_LIN-NEXT: fld %st(1)
; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp)
; SSE3_64_LIN-NEXT: fucompi %st(1)
; SSE3_64_LIN-NEXT: fstp %st(0)
; SSE3_64_LIN-NEXT: jbe .LBB4_1
; SSE3_64_LIN-NEXT: # %bb.2:
; SSE3_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; SSE3_64_LIN-NEXT: retq
; SSE3_64_LIN-NEXT: .LBB4_1:
; SSE3_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE3_64_LIN-NEXT: setbe %al
; SSE3_64_LIN-NEXT: shlq $63, %rax
; SSE3_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; SSE3_64_LIN-NEXT: retq
;
Expand Down Expand Up @@ -1246,37 +1239,27 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
;
; SSE2_64_WIN-LABEL: x_to_u64:
; SSE2_64_WIN: # %bb.0:
; SSE2_64_WIN-NEXT: subq $24, %rsp
; SSE2_64_WIN-NEXT: subq $16, %rsp
; SSE2_64_WIN-NEXT: fldt (%rcx)
; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip)
; SSE2_64_WIN-NEXT: fld %st(1)
; SSE2_64_WIN-NEXT: fsub %st(1)
; SSE2_64_WIN-NEXT: xorl %eax, %eax
; SSE2_64_WIN-NEXT: fxch %st(1)
; SSE2_64_WIN-NEXT: fucompi %st(2)
; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st(0)
; SSE2_64_WIN-NEXT: fstp %st(1)
; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %ecx
; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: movw %cx, {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: movzwl {{[0-9]+}}(%rsp), %eax
; SSE2_64_WIN-NEXT: movw $3199, {{[0-9]+}}(%rsp) # imm = 0xC7F
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: movw %ax, {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: fld %st(1)
; SSE2_64_WIN-NEXT: fistpll {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: fldcw {{[0-9]+}}(%rsp)
; SSE2_64_WIN-NEXT: fucompi %st(1)
; SSE2_64_WIN-NEXT: fstp %st(0)
; SSE2_64_WIN-NEXT: jbe .LBB4_1
; SSE2_64_WIN-NEXT: # %bb.2:
; SSE2_64_WIN-NEXT: movq {{[0-9]+}}(%rsp), %rax
; SSE2_64_WIN-NEXT: addq $24, %rsp
; SSE2_64_WIN-NEXT: retq
; SSE2_64_WIN-NEXT: .LBB4_1:
; SSE2_64_WIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE2_64_WIN-NEXT: setbe %al
; SSE2_64_WIN-NEXT: shlq $63, %rax
; SSE2_64_WIN-NEXT: xorq {{[0-9]+}}(%rsp), %rax
; SSE2_64_WIN-NEXT: addq $24, %rsp
; SSE2_64_WIN-NEXT: addq $16, %rsp
; SSE2_64_WIN-NEXT: retq
;
; SSE2_64_LIN-LABEL: x_to_u64:
Expand All @@ -1285,29 +1268,20 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
; SSE2_64_LIN-NEXT: flds {{.*}}(%rip)
; SSE2_64_LIN-NEXT: fld %st(1)
; SSE2_64_LIN-NEXT: fsub %st(1)
; SSE2_64_LIN-NEXT: xorl %eax, %eax
; SSE2_64_LIN-NEXT: fxch %st(1)
; SSE2_64_LIN-NEXT: fucompi %st(2)
; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st(0)
; SSE2_64_LIN-NEXT: fstp %st(1)
; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx
; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: movw %cx, -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax
; SSE2_64_LIN-NEXT: movw $3199, -{{[0-9]+}}(%rsp) # imm = 0xC7F
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: fld %st(1)
; SSE2_64_LIN-NEXT: fistpll -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: fldcw -{{[0-9]+}}(%rsp)
; SSE2_64_LIN-NEXT: fucompi %st(1)
; SSE2_64_LIN-NEXT: fstp %st(0)
; SSE2_64_LIN-NEXT: jbe .LBB4_1
; SSE2_64_LIN-NEXT: # %bb.2:
; SSE2_64_LIN-NEXT: movq -{{[0-9]+}}(%rsp), %rax
; SSE2_64_LIN-NEXT: retq
; SSE2_64_LIN-NEXT: .LBB4_1:
; SSE2_64_LIN-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE2_64_LIN-NEXT: setbe %al
; SSE2_64_LIN-NEXT: shlq $63, %rax
; SSE2_64_LIN-NEXT: xorq -{{[0-9]+}}(%rsp), %rax
; SSE2_64_LIN-NEXT: retq
;
Expand Down

0 comments on commit 0add090

Please sign in to comment.