-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86][GlobalISel] Support G_SELECT for x86_fp80 #157377
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
G_SELECT for x86_fp80 is required to implement G_FPTOUI and G_UITOFP. Legalize the predicate operand first to prevent clamping of fp80 type. Since every G_SELECT initially uses s8 for the predicate type, we were always clamping the input. To maintain this behavior, we add additional conditions for legal types, such as Is64Bit and HasCMOV.
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-globalisel Author: Evgenii Kudriashov (e-kud) ChangesG_SELECT for x86_fp80 is required to implement G_FPTOUI and G_UITOFP. Legalize the predicate operand first to prevent clamping of fp80 type. Since every G_SELECT initially uses s8 for the predicate type, we were always clamping the input. To maintain this behavior, we add additional conditions for legal types, such as Is64Bit and HasCMOV. Full diff: https://github.com/llvm/llvm-project/pull/157377.diff 6 Files Affected:
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 3090ad313b90d..7561a7df5c138 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -1877,28 +1877,34 @@ bool X86InstructionSelector::selectSelect(MachineInstr &I,
unsigned OpCmp;
LLT Ty = MRI.getType(DstReg);
- switch (Ty.getSizeInBits()) {
- default:
- return false;
- case 8:
- OpCmp = X86::CMOV_GR8;
- break;
- case 16:
- OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
- break;
- case 32:
- OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
- break;
- case 64:
- assert(STI.is64Bit() && STI.canUseCMOV());
- OpCmp = X86::CMOV64rr;
- break;
+ if (Ty.getSizeInBits() == 80) {
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::CMOVE_Fp80),
+ DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg());
+ } else {
+ switch (Ty.getSizeInBits()) {
+ default:
+ return false;
+ case 8:
+ OpCmp = X86::CMOV_GR8;
+ break;
+ case 16:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
+ break;
+ case 32:
+ OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
+ break;
+ case 64:
+ assert(STI.is64Bit() && STI.canUseCMOV());
+ OpCmp = X86::CMOV64rr;
+ break;
+ }
+ BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
+ .addReg(Sel.getTrueReg())
+ .addReg(Sel.getFalseReg())
+ .addImm(X86::COND_E);
}
- BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
- .addReg(Sel.getTrueReg())
- .addReg(Sel.getFalseReg())
- .addImm(X86::COND_E);
-
const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");
diff --git a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
index 7fe58539cd4ec..b01aed448ab22 100644
--- a/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/GISel/X86LegalizerInfo.cpp
@@ -568,10 +568,13 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
// todo: vectors and address spaces
getActionDefinitionsBuilder(G_SELECT)
- .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}})
+ .legalFor({{s16, s32}, {s32, s32}, {p0, s32}})
+ .legalFor(!HasCMOV, {{s8, s32}})
+ .legalFor(Is64Bit, {{s64, s32}})
+ .legalFor(UseX87, {{s80, s32}})
+ .clampScalar(1, s32, s32)
.widenScalarToNextPow2(0, /*Min=*/8)
- .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar)
- .clampScalar(1, s32, s32);
+ .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar);
// memory intrinsics
getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir
index 19fe5b84c73ce..85f7c77cbdb60 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir
@@ -25,6 +25,7 @@ body: |
; X64-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]]
; X64-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[SUB]], [[C]]
; X64-NEXT: RET 0, implicit [[AND1]](s64)
+ ;
; X86-LABEL: name: test_ctlz35
; X86: [[COPY:%[0-9]+]]:_(s64) = COPY $rdx
; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s35) = G_TRUNC [[COPY]](s64)
@@ -36,9 +37,9 @@ body: |
; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]]
; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
+ ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64)
- ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV2]], [[UV4]]
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV3]], [[UV5]]
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
@@ -97,6 +98,7 @@ body: |
; X64-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[DEF]](s64)
; X64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[CTLZ]](s64)
; X64-NEXT: RET 0, implicit [[COPY]](s64)
+ ;
; X86-LABEL: name: test_ctlz64
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
@@ -106,9 +108,9 @@ body: |
; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]]
; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32)
+ ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64)
; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64)
- ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV2]], [[UV4]]
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV3]], [[UV5]]
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir
index a7cbb35e3f74c..6ab424eeaa780 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir
@@ -33,9 +33,9 @@ body: |
; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF
; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF
; X86-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF
+ ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64)
; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64)
- ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV]], [[UV2]]
; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV1]], [[UV3]]
; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32)
@@ -115,9 +115,9 @@ body: |
; X64: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF
; X64-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF
; X64-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF
+ ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF1]](s8)
; X64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF]](s8)
- ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1)
; X64-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ZEXT]](s32), [[ANYEXT]], [[ANYEXT1]]
; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16)
; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8)
diff --git a/llvm/test/CodeGen/X86/fcmove.ll b/llvm/test/CodeGen/X86/fcmove.ll
deleted file mode 100644
index 6bb014858d048..0000000000000
--- a/llvm/test/CodeGen/X86/fcmove.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-unknown"
-
-; Test that we can generate an fcmove, and also that it passes verification.
-
-; CHECK-LABEL: cmove_f
-; CHECK: fcmove %st({{[0-7]}}), %st
-define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) {
- %test = icmp eq i32 %c, 0
- %add = fadd x86_fp80 %a, %b
- %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b
- ret x86_fp80 %ret
-}
diff --git a/llvm/test/CodeGen/X86/isel-select-fcmov.ll b/llvm/test/CodeGen/X86/isel-select-fcmov.ll
new file mode 100644
index 0000000000000..f50abf4e49ea1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-select-fcmov.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -global-isel -global-isel-abort=1 -verify-machineinstrs | FileCheck %s --check-prefix=GISEL
+; RUN: llc < %s -fast-isel=0 -global-isel=0 -verify-machineinstrs | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Test that we can generate an fcmove, and also that it passes verification.
+
+define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) {
+; CHECK-LABEL: cmove_f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: fadd %st(1), %st
+; CHECK-NEXT: fxch %st(1)
+; CHECK-NEXT: fcmove %st(1), %st
+; CHECK-NEXT: fstp %st(1)
+; CHECK-NEXT: retq
+;
+; GISEL-LABEL: cmove_f:
+; GISEL: # %bb.0:
+; GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL-NEXT: xorl %eax, %eax
+; GISEL-NEXT: cmpl $0, %edi
+; GISEL-NEXT: sete %al
+; GISEL-NEXT: fadd %st, %st(1)
+; GISEL-NEXT: andl $1, %eax
+; GISEL-NEXT: testl %eax, %eax
+; GISEL-NEXT: fxch %st(1)
+; GISEL-NEXT: fcmove %st(1), %st
+; GISEL-NEXT: fstp %st(1)
+; GISEL-NEXT: retq
+ %test = icmp eq i32 %c, 0
+ %add = fadd x86_fp80 %a, %b
+ %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b
+ ret x86_fp80 %ret
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
G_SELECT for x86_fp80 is required to implement G_FPTOUI and G_UITOFP.
Legalize the predicate operand first to prevent clamping of fp80 type. Since every G_SELECT initially uses s8 for the predicate type, we were always clamping the input. To keep this behavior, we add additional conditions for legal types, such as Is64Bit and HasCMOV.