Skip to content

[AMDGPU] Use FPImmLeaf for float constants, fix build_vector patterns#178018

Merged
mbrkusanin merged 3 commits intollvm:mainfrom
mbrkusanin:use-fpimmleaf
Jan 28, 2026
Merged

[AMDGPU] Use FPImmLeaf for float constants, fix build_vector patterns#178018
mbrkusanin merged 3 commits intollvm:mainfrom
mbrkusanin:use-fpimmleaf

Conversation

@mbrkusanin
Copy link
Copy Markdown
Collaborator

No description provided.

@llvmbot
Copy link
Copy Markdown
Member

llvmbot commented Jan 26, 2026

@llvm/pr-subscribers-backend-amdgpu

Author: Mirko Brkušanin (mbrkusanin)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/178018.diff

3 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructions.td (+5-19)
  • (modified) llvm/lib/Target/AMDGPU/R600Instructions.td (+7-7)
  • (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+2-2)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 2d649c2b7c5eb..37904872d775c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -746,23 +746,9 @@ int FP64_NEG_ONE = 0xbff0000000000000;
 }
 def CONST : Constants;
 
-def FP_ZERO : PatLeaf <
-  (fpimm),
-  [{return N->getValueAPF().isZero();}]
->;
-
-def FP_ONE : PatLeaf <
-  (fpimm),
-  [{return N->isExactlyValue(1.0);}]
->;
-
-def FP_HALF : PatLeaf <
-  (fpimm),
-  [{return N->isExactlyValue(0.5);}]> {
-  let GISelPredicateCode = [{
-    return MI.getOperand(1).getFPImm()->isExactlyValue(0.5);
-  }];
-}
+def fpimm_zero : FPImmLeaf<fAny, [{ return Imm.isZero(); }]> ;
+def fpimm_one : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(+1.0); }]> ;
+def fpimm_half : FPImmLeaf<fAny, [{ return Imm.isExactlyValue(+0.5); }]> ;
 
 /* Generic helper patterns for intrinsics */
 /* -------------------------------------- */
@@ -812,7 +798,7 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat <
 let GIIgnoreCopies = 1 in
 def cvt_rpi_i32_f32 : PatFrag<
   (ops node:$src),
-  (fp_to_sint (ffloor_nnan (fadd $src, FP_HALF)))
+  (fp_to_sint (ffloor_nnan (fadd $src, fpimm_half)))
 >, GISelFlags;
 
 def cvt_flr_i32_f32 : PatFrag<
@@ -835,7 +821,7 @@ class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat <
 } // AddedComplexity.
 
 class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
-  (fdiv FP_ONE, vt:$src),
+  (fdiv fpimm_one, vt:$src),
   (RcpInst $src)
 >;
 
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index dda0cf6a32182..6d7cc8b9cd563 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -736,22 +736,22 @@ def MIN_DX10 : R600_2OP_Helper <0x6, "MIN_DX10", fminnum>;
 // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
 def SETE : R600_2OP <
   0x08, "SETE",
-  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, fpimm_one, fpimm_zero, COND_OEQ))]
 >;
 
 def SGT : R600_2OP <
   0x09, "SETGT",
-  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, fpimm_one, fpimm_zero, COND_OGT))]
 >;
 
 def SGE : R600_2OP <
   0xA, "SETGE",
-  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, fpimm_one, fpimm_zero, COND_OGE))]
 >;
 
 def SNE : R600_2OP <
   0xB, "SETNE",
-  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE_NE))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, fpimm_one, fpimm_zero, COND_UNE_NE))]
 >;
 
 def SETE_DX10 : R600_2OP <
@@ -1004,19 +1004,19 @@ class FMA_Common <bits<5> inst> : R600_3OP <
 
 class CNDE_Common <bits<5> inst> : R600_3OP <
   inst, "CNDE",
-  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
+  [(set f32:$dst, (selectcc f32:$src0, fpimm_zero, f32:$src1, f32:$src2, COND_OEQ))]
 >;
 
 class CNDGT_Common <bits<5> inst> : R600_3OP <
   inst, "CNDGT",
-  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
+  [(set f32:$dst, (selectcc f32:$src0, fpimm_zero, f32:$src1, f32:$src2, COND_OGT))]
 > {
   let Itinerary = VecALU;
 }
 
 class CNDGE_Common <bits<5> inst> : R600_3OP <
   inst, "CNDGE",
-  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
+  [(set f32:$dst, (selectcc f32:$src0, fpimm_zero, f32:$src1, f32:$src2, COND_OGE))]
 > {
   let Itinerary = VecALU;
 }
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index e06bc912113a8..5e3b4c66afd6e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3776,7 +3776,7 @@ def : GCNPat <
 >;
 
 def : GCNPat <
-  (v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))),
+  (v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 fpimm_zero))),
   (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
 >;
 }
@@ -3787,7 +3787,7 @@ def : GCNPat <
 >;
 
 def : GCNPat <
-  (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 FP_ZERO))),
+  (v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src1), (f16 fpimm_zero))),
   (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1)
 >;
 

@@ -3776,7 +3776,7 @@ def : GCNPat <
>;

def : GCNPat <
(v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))),
(v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 fpimm_zero))),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These cases actually had and still have a bug. These require the constant to be a positive zero. The matcher is improperly selecting + or - 0

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed to fpimm_pos_zero

return MI.getOperand(1).getFPImm()->isExactlyValue(0.5);
}];
}
def fpimm_zero : FPImmLeaf<fAny, [{ return Imm.isZero(); }]> ;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def fpimm_zero : FPImmLeaf<fAny, [{ return Imm.isZero(); }]> ;
def fpimm_zero : FPImmLeaf<fAny, [{ return Imm.isZero() && !Imm.isNegative(); }]> ;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use isPosZero?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is already

def fpimm_pos_zero : FPImmLeaf<fAny, [{
  return Imm.isZero() && !Imm.isNegative();
}]>;

in llvm/lib/Target/AMDGPU/SIInstrInfo.td

@@ -3776,7 +3776,7 @@ def : GCNPat <
>;

def : GCNPat <
(v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 FP_ZERO))),
(v2f16 (DivergentBinFrag<build_vector> (f16 VGPR_32:$src1), (f16 fpimm_zero))),
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add some tests for these patterns with a -0

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

@mbrkusanin mbrkusanin changed the title [AMDGPU] Use FPImmLeaf for float constants in tablegen. NFCI. [AMDGPU] Use FPImmLeaf for float constants, fix build_vector patterns Jan 27, 2026
@mbrkusanin mbrkusanin merged commit 9d800ad into llvm:main Jan 28, 2026
11 checks passed
honeygoyal pushed a commit to honeygoyal/llvm-project that referenced this pull request Jan 30, 2026
@mbrkusanin mbrkusanin deleted the use-fpimmleaf branch April 21, 2026 13:59
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants