Skip to content

Conversation

@Wolfram70
Copy link
Contributor

This change adds intrinsics and clang builtins for the remaining float to fp16 conversions. This includes the following conversions:

  • float to bf16x2 - satfinite variants
  • float to f16x2 - satfinite variants
  • float to bf16 - satfinite variants
  • float to f16 - all variants

Tests are added in convert-sm80.ll and convert-sm80-sf.ll for the intrinsics and in builtins-nvptx.c for the clang builtins.

@Wolfram70 Wolfram70 self-assigned this Nov 12, 2025
@llvmbot llvmbot added clang Clang issues not falling into any other category clang:frontend Language frontend issues, e.g. anything involving "Sema" backend:NVPTX llvm:ir labels Nov 12, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 12, 2025

@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-backend-nvptx

Author: Srinivasa Ravi (Wolfram70)

Changes

This change adds intrinsics and clang builtins for the remaining float to fp16 conversions. This includes the following conversions:

  • float to bf16x2 - satfinite variants
  • float to f16x2 - satfinite variants
  • float to bf16 - satfinite variants
  • float to f16 - all variants

Tests are added in convert-sm80.ll and convert-sm80-sf.ll for the intrinsics and in builtins-nvptx.c for the clang builtins.


Patch is 27.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167641.diff

7 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsNVPTX.td (+21)
  • (modified) clang/test/CodeGen/builtins-nvptx.c (+52)
  • (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+13-8)
  • (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+14)
  • (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+29-1)
  • (added) llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll (+263)
  • (modified) llvm/test/CodeGen/NVPTX/convert-sm80.ll (+65)
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index d923d2a90e908..f221bac4dc728 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -579,6 +579,10 @@ def __nvvm_ff2bf16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)
 def __nvvm_ff2bf16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2bf16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2bf16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2bf16x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rz_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
 def __nvvm_ff2bf16x2_rs :
   NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)",
                        SM<"100a", [SM_103a]>, PTX87>;
@@ -596,6 +600,10 @@ def __nvvm_ff2f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)"
 def __nvvm_ff2f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2f16x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rz_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
 def __nvvm_ff2f16x2_rs :
   NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)",
                        SM<"100a", [SM_103a]>, PTX87>;
@@ -613,6 +621,19 @@ def __nvvm_f2bf16_rn : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
 def __nvvm_f2bf16_rn_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
 def __nvvm_f2bf16_rz : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
 def __nvvm_f2bf16_rz_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
+def __nvvm_f2bf16_rn_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rz_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+
+def __nvvm_f2f16_rn : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rn_relu : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rz : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rz_relu : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rn_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rz_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
 
 def __nvvm_f2tf32_rna : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX70>;
 def __nvvm_f2tf32_rna_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_89, PTX81>;
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index e3be262622844..0777405b0d3b0 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -28,6 +28,9 @@
 // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_89 -target-feature +ptx81 -DPTX=81\
 // RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
 // RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX81_SM89 %s
+// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_80 -target-feature +ptx81 -DPTX=81 \
+// RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX81_SM80 %s
 // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_90 -target-feature +ptx78 -DPTX=78 \
 // RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
 // RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX78_SM90 %s
@@ -1004,6 +1007,16 @@ __device__ void nvvm_cvt_sm80() {
   __nvvm_ff2bf16x2_rz(1, 1);
   // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float 1.000000e+00, float 1.000000e+00)
   __nvvm_ff2bf16x2_rz_relu(1, 1);
+  #if PTX >= 81
+  // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2bf16x2_rn_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2bf16x2_rn_relu_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2bf16x2_rz_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2bf16x2_rz_relu_satfinite(1, 1);
+  #endif
 
   // CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn(float 1.000000e+00, float 1.000000e+00)
   __nvvm_ff2f16x2_rn(1, 1);
@@ -1013,6 +1026,16 @@ __device__ void nvvm_cvt_sm80() {
   __nvvm_ff2f16x2_rz(1, 1);
   // CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float 1.000000e+00, float 1.000000e+00)
   __nvvm_ff2f16x2_rz_relu(1, 1);
+  #if PTX >= 81
+  // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2f16x2_rn_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2f16x2_rn_relu_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2f16x2_rz_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2f16x2_rz_relu_satfinite(1, 1);
+  #endif
 
   // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.f2bf16.rn(float 1.000000e+00)
   __nvvm_f2bf16_rn(1);
@@ -1022,6 +1045,35 @@ __device__ void nvvm_cvt_sm80() {
   __nvvm_f2bf16_rz(1);
   // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.f2bf16.rz.relu(float 1.000000e+00)
   __nvvm_f2bf16_rz_relu(1);
+  #if PTX >= 81
+  // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rn.satfinite(float 1.000000e+00)
+  __nvvm_f2bf16_rn_satfinite(1);
+  // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rn.relu.satfinite(float 1.000000e+00)
+  __nvvm_f2bf16_rn_relu_satfinite(1);
+  // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rz.satfinite(float 1.000000e+00)
+  __nvvm_f2bf16_rz_satfinite(1);
+  // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rz.relu.satfinite(float 1.000000e+00)
+  __nvvm_f2bf16_rz_relu_satfinite(1);
+  #endif
+
+  // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rn(float 1.000000e+00)
+  __nvvm_f2f16_rn(1);
+  // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rn.relu(float 1.000000e+00)
+  __nvvm_f2f16_rn_relu(1);
+  // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rz(float 1.000000e+00)
+  __nvvm_f2f16_rz(1);
+  // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rz.relu(float 1.000000e+00)
+  __nvvm_f2f16_rz_relu(1);
+  #if PTX >= 81
+  // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rn.satfinite(float 1.000000e+00)
+  __nvvm_f2f16_rn_satfinite(1);
+  // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rn.relu.satfinite(float 1.000000e+00)
+  __nvvm_f2f16_rn_relu_satfinite(1);
+  // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rz.satfinite(float 1.000000e+00)
+  __nvvm_f2f16_rz_satfinite(1);
+  // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rz.relu.satfinite(float 1.000000e+00)
+  __nvvm_f2f16_rz_relu_satfinite(1);
+  #endif
 
   // CHECK_PTX70_SM80: call i32 @llvm.nvvm.f2tf32.rna(float 1.000000e+00)
   __nvvm_f2tf32_rna(1);
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 719181a09f475..f7bea61615251 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1573,14 +1573,19 @@ let TargetPrefix = "nvvm" in {
 
   foreach rnd = ["rn", "rz"] in {
     foreach relu = ["", "_relu"] in {
-      def int_nvvm_ff2bf16x2_ # rnd # relu : NVVMBuiltin,
-          PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
-
-      def int_nvvm_ff2f16x2_ # rnd # relu : NVVMBuiltin,
-          PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
-
-      def int_nvvm_f2bf16_ # rnd # relu : NVVMBuiltin,
-          PureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
+      foreach satfinite = ["", "_satfinite"] in {
+        def int_nvvm_ff2bf16x2_ # rnd # relu # satfinite : NVVMBuiltin,
+            PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
+
+        def int_nvvm_ff2f16x2_ # rnd # relu # satfinite : NVVMBuiltin,
+            PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
+
+        def int_nvvm_f2bf16_ # rnd # relu # satfinite : NVVMBuiltin,
+            PureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
+            
+        def int_nvvm_f2f16_ # rnd # relu # satfinite : NVVMBuiltin,
+            PureIntrinsic<[llvm_half_ty], [llvm_float_ty]>;
+      }
     }
   }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index dfde0cca0f00c..47693c026f9e9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -596,6 +596,15 @@ let hasSideEffects = false in {
   defm CVT_bf16 : CVT_FROM_ALL<"bf16", B16, [hasPTX<78>, hasSM<90>]>;
   defm CVT_f32 : CVT_FROM_ALL<"f32", B32>;
   defm CVT_f64 : CVT_FROM_ALL<"f64", B64>;
+  
+  multiclass CVT_FROM_FLOAT_SATFINITE<string ToName, RegisterClass RC> {
+    def _f32_sf :
+      BasicFlagsNVPTXInst<(outs RC:$dst),
+                (ins B32:$src), (ins CvtMode:$mode),
+                "cvt${mode:base}${mode:relu}.satfinite." # ToName # ".f32">;
+  }
+  defm CVT_bf16 : CVT_FROM_FLOAT_SATFINITE<"bf16", B16>;
+  defm CVT_f16 : CVT_FROM_FLOAT_SATFINITE<"f16", B16>;
 
   // These cvts are different from those above: The source and dest registers
   // are of the same type.
@@ -612,6 +621,11 @@ let hasSideEffects = false in {
                 (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
                 "cvt${mode:base}${mode:relu}." # FromName # ".f32">,
     Requires<[hasPTX<70>, hasSM<80>]>;
+    
+    def _f32_sf :
+      BasicFlagsNVPTXInst<(outs RC:$dst),
+                (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
+                "cvt${mode:base}${mode:relu}.satfinite." # FromName # ".f32">;
   }
 
   defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", B32>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index c923f0ec907e7..94c9aebafb473 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1936,7 +1936,12 @@ def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b),      (CVT_bf16x2_f32 $a, $b, C
 def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b), (CVT_bf16x2_f32 $a, $b, CvtRN_RELU)>;
 def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b),      (CVT_bf16x2_f32 $a, $b, CvtRZ)>;
 def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b), (CVT_bf16x2_f32 $a, $b, CvtRZ_RELU)>;
-
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+  def : Pat<(int_nvvm_ff2bf16x2_rn_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRN)>;
+  def : Pat<(int_nvvm_ff2bf16x2_rn_relu_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRN_RELU)>;
+  def : Pat<(int_nvvm_ff2bf16x2_rz_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRZ)>;
+  def : Pat<(int_nvvm_ff2bf16x2_rz_relu_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRZ_RELU)>;
+}
 let Predicates = [hasPTX<87>, hasSM100aOrSM103a] in {
 def : Pat<(int_nvvm_ff2bf16x2_rs f32:$a, f32:$b, i32:$c),
           (CVT_bf16x2_f32_rs $a, $b, $c, CvtRS)>;
@@ -1952,6 +1957,12 @@ def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b),      (CVT_f16x2_f32 $a, $b, Cvt
 def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b), (CVT_f16x2_f32 $a, $b, CvtRN_RELU)>;
 def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b),      (CVT_f16x2_f32 $a, $b, CvtRZ)>;
 def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b), (CVT_f16x2_f32 $a, $b, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+  def : Pat<(int_nvvm_ff2f16x2_rn_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRN)>;
+  def : Pat<(int_nvvm_ff2f16x2_rn_relu_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRN_RELU)>;
+  def : Pat<(int_nvvm_ff2f16x2_rz_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRZ)>;
+  def : Pat<(int_nvvm_ff2f16x2_rz_relu_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRZ_RELU)>;
+}
 
 let Predicates = [hasPTX<87>, hasSM100aOrSM103a] in {
 def : Pat<(int_nvvm_ff2f16x2_rs f32:$a, f32:$b, i32:$c),
@@ -1967,6 +1978,23 @@ def : Pat<(int_nvvm_f2bf16_rn f32:$a),      (CVT_bf16_f32 $a, CvtRN)>;
 def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a), (CVT_bf16_f32 $a, CvtRN_RELU)>;
 def : Pat<(int_nvvm_f2bf16_rz f32:$a),      (CVT_bf16_f32 $a, CvtRZ)>;
 def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a), (CVT_bf16_f32 $a, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+  def : Pat<(int_nvvm_f2bf16_rz_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRZ)>;
+  def : Pat<(int_nvvm_f2bf16_rz_relu_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRZ_RELU)>;
+  def : Pat<(int_nvvm_f2bf16_rn_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRN)>;
+  def : Pat<(int_nvvm_f2bf16_rn_relu_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRN_RELU)>;
+}
+
+def : Pat<(int_nvvm_f2f16_rn f32:$a),      (CVT_f16_f32 $a, CvtRN)>;
+def : Pat<(int_nvvm_f2f16_rn_relu f32:$a), (CVT_f16_f32 $a, CvtRN_RELU)>;
+def : Pat<(int_nvvm_f2f16_rz f32:$a),      (CVT_f16_f32 $a, CvtRZ)>;
+def : Pat<(int_nvvm_f2f16_rz_relu f32:$a), (CVT_f16_f32 $a, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+  def : Pat<(int_nvvm_f2f16_rz_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRZ)>;
+  def : Pat<(int_nvvm_f2f16_rz_relu_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRZ_RELU)>;
+  def : Pat<(int_nvvm_f2f16_rn_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRN)>;
+  def : Pat<(int_nvvm_f2f16_rn_relu_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRN_RELU)>;
+}
 
 def : Pat<(int_nvvm_lohi_i2d i32:$a, i32:$b), (V2I32toI64 $a, $b)>;
 def : Pat<(int_nvvm_d2i_lo f64:$a), (I64toI32L $a)>;
diff --git a/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll
new file mode 100644
index 0000000000000..20d1acabe8f53
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll
@@ -0,0 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx81 | FileCheck %s
+; RUN: %if ptxas-sm_80 && ptxas-isa-8.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx81 | %ptxas-verify -arch=sm_80 %}
+
+define <2 x bfloat> @cvt_rn_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_bf16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rn_bf16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rn_bf16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rn.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float %f1, float %f2)
+  ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rn_relu_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_relu_bf16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rn_relu_bf16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rn_relu_bf16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rn.relu.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float %f1, float %f2)
+  ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rz_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_bf16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rz_bf16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rz_bf16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rz.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float %f1, float %f2)
+  ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rz_relu_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_relu_bf16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rz_relu_bf16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rz_relu_bf16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rz.relu.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float %f1, float %f2)
+  ret <2 x bfloat> %val
+}
+
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float, float)
+
+define <2 x half> @cvt_rn_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_f16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rn_f16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rn_f16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rn.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.satfinite(float %f1, float %f2)
+  ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rn_relu_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_relu_f16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rn_relu_f16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rn_relu_f16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rn.relu.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu.satfinite(float %f1, float %f2)
+  ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rz_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_f16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rz_f16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rz_f16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rz.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rz.satfinite(float %f1, float %f2)
+  ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rz_relu_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_relu_f16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rz_relu_f16x2_f32_sf_param_0...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Nov 12, 2025

@llvm/pr-subscribers-clang

Author: Srinivasa Ravi (Wolfram70)

Changes

This change adds intrinsics and clang builtins for the remaining float to fp16 conversions. This includes the following conversions:

  • float to bf16x2 - satfinite variants
  • float to f16x2 - satfinite variants
  • float to bf16 - satfinite variants
  • float to f16 - all variants

Tests are added in convert-sm80.ll and convert-sm80-sf.ll for the intrinsics and in builtins-nvptx.c for the clang builtins.


Patch is 27.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/167641.diff

7 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsNVPTX.td (+21)
  • (modified) clang/test/CodeGen/builtins-nvptx.c (+52)
  • (modified) llvm/include/llvm/IR/IntrinsicsNVVM.td (+13-8)
  • (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+14)
  • (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+29-1)
  • (added) llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll (+263)
  • (modified) llvm/test/CodeGen/NVPTX/convert-sm80.ll (+65)
diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.td b/clang/include/clang/Basic/BuiltinsNVPTX.td
index d923d2a90e908..f221bac4dc728 100644
--- a/clang/include/clang/Basic/BuiltinsNVPTX.td
+++ b/clang/include/clang/Basic/BuiltinsNVPTX.td
@@ -579,6 +579,10 @@ def __nvvm_ff2bf16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)
 def __nvvm_ff2bf16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2bf16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2bf16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2bf16x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rz_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2bf16x2_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float)", SM_80, PTX81>;
 def __nvvm_ff2bf16x2_rs :
   NVPTXBuiltinSMAndPTX<"_Vector<2, __bf16>(float, float, uint32_t)",
                        SM<"100a", [SM_103a]>, PTX87>;
@@ -596,6 +600,10 @@ def __nvvm_ff2f16x2_rn : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)"
 def __nvvm_ff2f16x2_rn_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rz : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
 def __nvvm_ff2f16x2_rz_relu : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX70>;
+def __nvvm_ff2f16x2_rn_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rz_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
+def __nvvm_ff2f16x2_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float)", SM_80, PTX81>;
 def __nvvm_ff2f16x2_rs :
   NVPTXBuiltinSMAndPTX<"_Vector<2, __fp16>(float, float, uint32_t)",
                        SM<"100a", [SM_103a]>, PTX87>;
@@ -613,6 +621,19 @@ def __nvvm_f2bf16_rn : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
 def __nvvm_f2bf16_rn_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
 def __nvvm_f2bf16_rz : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
 def __nvvm_f2bf16_rz_relu : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX70>;
+def __nvvm_f2bf16_rn_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rz_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+def __nvvm_f2bf16_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"__bf16(float)", SM_80, PTX81>;
+
+def __nvvm_f2f16_rn : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rn_relu : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rz : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rz_relu : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX70>;
+def __nvvm_f2f16_rn_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rn_relu_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rz_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
+def __nvvm_f2f16_rz_relu_satfinite : NVPTXBuiltinSMAndPTX<"__fp16(float)", SM_80, PTX81>;
 
 def __nvvm_f2tf32_rna : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_80, PTX70>;
 def __nvvm_f2tf32_rna_satfinite : NVPTXBuiltinSMAndPTX<"int32_t(float)", SM_89, PTX81>;
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index e3be262622844..0777405b0d3b0 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -28,6 +28,9 @@
 // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_89 -target-feature +ptx81 -DPTX=81\
 // RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
 // RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX81_SM89 %s
+// RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_80 -target-feature +ptx81 -DPTX=81 \
+// RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX81_SM80 %s
 // RUN: %clang_cc1 -ffp-contract=off -triple nvptx64-unknown-unknown -target-cpu sm_90 -target-feature +ptx78 -DPTX=78 \
 // RUN:            -disable-llvm-optzns -fcuda-is-device -emit-llvm -o - -x cuda %s \
 // RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK_PTX78_SM90 %s
@@ -1004,6 +1007,16 @@ __device__ void nvvm_cvt_sm80() {
   __nvvm_ff2bf16x2_rz(1, 1);
   // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu(float 1.000000e+00, float 1.000000e+00)
   __nvvm_ff2bf16x2_rz_relu(1, 1);
+  #if PTX >= 81
+  // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2bf16x2_rn_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2bf16x2_rn_relu_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2bf16x2_rz_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2bf16x2_rz_relu_satfinite(1, 1);
+  #endif
 
   // CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn(float 1.000000e+00, float 1.000000e+00)
   __nvvm_ff2f16x2_rn(1, 1);
@@ -1013,6 +1026,16 @@ __device__ void nvvm_cvt_sm80() {
   __nvvm_ff2f16x2_rz(1, 1);
   // CHECK_PTX70_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu(float 1.000000e+00, float 1.000000e+00)
   __nvvm_ff2f16x2_rz_relu(1, 1);
+  #if PTX >= 81
+  // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2f16x2_rn_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2f16x2_rn_relu_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2f16x2_rz_satfinite(1, 1);
+  // CHECK_PTX81_SM80: call <2 x half> @llvm.nvvm.ff2f16x2.rz.relu.satfinite(float 1.000000e+00, float 1.000000e+00)
+  __nvvm_ff2f16x2_rz_relu_satfinite(1, 1);
+  #endif
 
   // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.f2bf16.rn(float 1.000000e+00)
   __nvvm_f2bf16_rn(1);
@@ -1022,6 +1045,35 @@ __device__ void nvvm_cvt_sm80() {
   __nvvm_f2bf16_rz(1);
   // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.f2bf16.rz.relu(float 1.000000e+00)
   __nvvm_f2bf16_rz_relu(1);
+  #if PTX >= 81
+  // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rn.satfinite(float 1.000000e+00)
+  __nvvm_f2bf16_rn_satfinite(1);
+  // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rn.relu.satfinite(float 1.000000e+00)
+  __nvvm_f2bf16_rn_relu_satfinite(1);
+  // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rz.satfinite(float 1.000000e+00)
+  __nvvm_f2bf16_rz_satfinite(1);
+  // CHECK_PTX81_SM80: call bfloat @llvm.nvvm.f2bf16.rz.relu.satfinite(float 1.000000e+00)
+  __nvvm_f2bf16_rz_relu_satfinite(1);
+  #endif
+
+  // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rn(float 1.000000e+00)
+  __nvvm_f2f16_rn(1);
+  // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rn.relu(float 1.000000e+00)
+  __nvvm_f2f16_rn_relu(1);
+  // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rz(float 1.000000e+00)
+  __nvvm_f2f16_rz(1);
+  // CHECK_PTX70_SM80: call half @llvm.nvvm.f2f16.rz.relu(float 1.000000e+00)
+  __nvvm_f2f16_rz_relu(1);
+  #if PTX >= 81
+  // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rn.satfinite(float 1.000000e+00)
+  __nvvm_f2f16_rn_satfinite(1);
+  // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rn.relu.satfinite(float 1.000000e+00)
+  __nvvm_f2f16_rn_relu_satfinite(1);
+  // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rz.satfinite(float 1.000000e+00)
+  __nvvm_f2f16_rz_satfinite(1);
+  // CHECK_PTX81_SM80: call half @llvm.nvvm.f2f16.rz.relu.satfinite(float 1.000000e+00)
+  __nvvm_f2f16_rz_relu_satfinite(1);
+  #endif
 
   // CHECK_PTX70_SM80: call i32 @llvm.nvvm.f2tf32.rna(float 1.000000e+00)
   __nvvm_f2tf32_rna(1);
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 719181a09f475..f7bea61615251 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -1573,14 +1573,19 @@ let TargetPrefix = "nvvm" in {
 
   foreach rnd = ["rn", "rz"] in {
     foreach relu = ["", "_relu"] in {
-      def int_nvvm_ff2bf16x2_ # rnd # relu : NVVMBuiltin,
-          PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
-
-      def int_nvvm_ff2f16x2_ # rnd # relu : NVVMBuiltin,
-          PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
-
-      def int_nvvm_f2bf16_ # rnd # relu : NVVMBuiltin,
-          PureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
+      foreach satfinite = ["", "_satfinite"] in {
+        def int_nvvm_ff2bf16x2_ # rnd # relu # satfinite : NVVMBuiltin,
+            PureIntrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty]>;
+
+        def int_nvvm_ff2f16x2_ # rnd # relu # satfinite : NVVMBuiltin,
+            PureIntrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty]>;
+
+        def int_nvvm_f2bf16_ # rnd # relu # satfinite : NVVMBuiltin,
+            PureIntrinsic<[llvm_bfloat_ty], [llvm_float_ty]>;
+            
+        def int_nvvm_f2f16_ # rnd # relu # satfinite : NVVMBuiltin,
+            PureIntrinsic<[llvm_half_ty], [llvm_float_ty]>;
+      }
     }
   }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index dfde0cca0f00c..47693c026f9e9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -596,6 +596,15 @@ let hasSideEffects = false in {
   defm CVT_bf16 : CVT_FROM_ALL<"bf16", B16, [hasPTX<78>, hasSM<90>]>;
   defm CVT_f32 : CVT_FROM_ALL<"f32", B32>;
   defm CVT_f64 : CVT_FROM_ALL<"f64", B64>;
+  
+  multiclass CVT_FROM_FLOAT_SATFINITE<string ToName, RegisterClass RC> {
+    def _f32_sf :
+      BasicFlagsNVPTXInst<(outs RC:$dst),
+                (ins B32:$src), (ins CvtMode:$mode),
+                "cvt${mode:base}${mode:relu}.satfinite." # ToName # ".f32">;
+  }
+  defm CVT_bf16 : CVT_FROM_FLOAT_SATFINITE<"bf16", B16>;
+  defm CVT_f16 : CVT_FROM_FLOAT_SATFINITE<"f16", B16>;
 
   // These cvts are different from those above: The source and dest registers
   // are of the same type.
@@ -612,6 +621,11 @@ let hasSideEffects = false in {
                 (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
                 "cvt${mode:base}${mode:relu}." # FromName # ".f32">,
     Requires<[hasPTX<70>, hasSM<80>]>;
+    
+    def _f32_sf :
+      BasicFlagsNVPTXInst<(outs RC:$dst),
+                (ins B32:$src1, B32:$src2), (ins CvtMode:$mode),
+                "cvt${mode:base}${mode:relu}.satfinite." # FromName # ".f32">;
   }
 
   defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", B32>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index c923f0ec907e7..94c9aebafb473 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1936,7 +1936,12 @@ def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b),      (CVT_bf16x2_f32 $a, $b, C
 def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b), (CVT_bf16x2_f32 $a, $b, CvtRN_RELU)>;
 def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b),      (CVT_bf16x2_f32 $a, $b, CvtRZ)>;
 def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b), (CVT_bf16x2_f32 $a, $b, CvtRZ_RELU)>;
-
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+  def : Pat<(int_nvvm_ff2bf16x2_rn_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRN)>;
+  def : Pat<(int_nvvm_ff2bf16x2_rn_relu_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRN_RELU)>;
+  def : Pat<(int_nvvm_ff2bf16x2_rz_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRZ)>;
+  def : Pat<(int_nvvm_ff2bf16x2_rz_relu_satfinite f32:$a, f32:$b), (CVT_bf16x2_f32_sf $a, $b, CvtRZ_RELU)>;
+}
 let Predicates = [hasPTX<87>, hasSM100aOrSM103a] in {
 def : Pat<(int_nvvm_ff2bf16x2_rs f32:$a, f32:$b, i32:$c),
           (CVT_bf16x2_f32_rs $a, $b, $c, CvtRS)>;
@@ -1952,6 +1957,12 @@ def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b),      (CVT_f16x2_f32 $a, $b, Cvt
 def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b), (CVT_f16x2_f32 $a, $b, CvtRN_RELU)>;
 def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b),      (CVT_f16x2_f32 $a, $b, CvtRZ)>;
 def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b), (CVT_f16x2_f32 $a, $b, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+  def : Pat<(int_nvvm_ff2f16x2_rn_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRN)>;
+  def : Pat<(int_nvvm_ff2f16x2_rn_relu_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRN_RELU)>;
+  def : Pat<(int_nvvm_ff2f16x2_rz_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRZ)>;
+  def : Pat<(int_nvvm_ff2f16x2_rz_relu_satfinite f32:$a, f32:$b), (CVT_f16x2_f32_sf $a, $b, CvtRZ_RELU)>;
+}
 
 let Predicates = [hasPTX<87>, hasSM100aOrSM103a] in {
 def : Pat<(int_nvvm_ff2f16x2_rs f32:$a, f32:$b, i32:$c),
@@ -1967,6 +1978,23 @@ def : Pat<(int_nvvm_f2bf16_rn f32:$a),      (CVT_bf16_f32 $a, CvtRN)>;
 def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a), (CVT_bf16_f32 $a, CvtRN_RELU)>;
 def : Pat<(int_nvvm_f2bf16_rz f32:$a),      (CVT_bf16_f32 $a, CvtRZ)>;
 def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a), (CVT_bf16_f32 $a, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+  def : Pat<(int_nvvm_f2bf16_rz_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRZ)>;
+  def : Pat<(int_nvvm_f2bf16_rz_relu_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRZ_RELU)>;
+  def : Pat<(int_nvvm_f2bf16_rn_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRN)>;
+  def : Pat<(int_nvvm_f2bf16_rn_relu_satfinite f32:$a), (CVT_bf16_f32_sf $a, CvtRN_RELU)>;
+}
+
+def : Pat<(int_nvvm_f2f16_rn f32:$a),      (CVT_f16_f32 $a, CvtRN)>;
+def : Pat<(int_nvvm_f2f16_rn_relu f32:$a), (CVT_f16_f32 $a, CvtRN_RELU)>;
+def : Pat<(int_nvvm_f2f16_rz f32:$a),      (CVT_f16_f32 $a, CvtRZ)>;
+def : Pat<(int_nvvm_f2f16_rz_relu f32:$a), (CVT_f16_f32 $a, CvtRZ_RELU)>;
+let Predicates = [hasPTX<81>, hasSM<80>] in {
+  def : Pat<(int_nvvm_f2f16_rz_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRZ)>;
+  def : Pat<(int_nvvm_f2f16_rz_relu_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRZ_RELU)>;
+  def : Pat<(int_nvvm_f2f16_rn_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRN)>;
+  def : Pat<(int_nvvm_f2f16_rn_relu_satfinite f32:$a), (CVT_f16_f32_sf $a, CvtRN_RELU)>;
+}
 
 def : Pat<(int_nvvm_lohi_i2d i32:$a, i32:$b), (V2I32toI64 $a, $b)>;
 def : Pat<(int_nvvm_d2i_lo f64:$a), (I64toI32L $a)>;
diff --git a/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll
new file mode 100644
index 0000000000000..20d1acabe8f53
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/convert-sm80-sf.ll
@@ -0,0 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx81 | FileCheck %s
+; RUN: %if ptxas-sm_80 && ptxas-isa-8.1 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -mattr=+ptx81 | %ptxas-verify -arch=sm_80 %}
+
+define <2 x bfloat> @cvt_rn_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_bf16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rn_bf16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rn_bf16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rn.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float %f1, float %f2)
+  ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rn_relu_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_relu_bf16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rn_relu_bf16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rn_relu_bf16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rn.relu.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float %f1, float %f2)
+  ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rz_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_bf16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rz_bf16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rz_bf16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rz.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float %f1, float %f2)
+  ret <2 x bfloat> %val
+}
+
+define <2 x bfloat> @cvt_rz_relu_bf16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_relu_bf16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rz_relu_bf16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rz_relu_bf16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rz.relu.satfinite.bf16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float %f1, float %f2)
+  ret <2 x bfloat> %val
+}
+
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rn.relu.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.satfinite(float, float)
+declare <2 x bfloat> @llvm.nvvm.ff2bf16x2.rz.relu.satfinite(float, float)
+
+define <2 x half> @cvt_rn_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_f16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rn_f16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rn_f16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rn.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.satfinite(float %f1, float %f2)
+  ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rn_relu_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rn_relu_f16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rn_relu_f16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rn_relu_f16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rn.relu.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rn.relu.satfinite(float %f1, float %f2)
+  ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rz_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_f16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rz_f16x2_f32_sf_param_0];
+; CHECK-NEXT:    ld.param.b32 %r2, [cvt_rz_f16x2_f32_sf_param_1];
+; CHECK-NEXT:    cvt.rz.satfinite.f16x2.f32 %r3, %r1, %r2;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT:    ret;
+  %val = call <2 x half> @llvm.nvvm.ff2f16x2.rz.satfinite(float %f1, float %f2)
+  ret <2 x half> %val
+}
+
+define <2 x half> @cvt_rz_relu_f16x2_f32_sf(float %f1, float %f2) {
+; CHECK-LABEL: cvt_rz_relu_f16x2_f32_sf(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [cvt_rz_relu_f16x2_f32_sf_param_0...
[truncated]

This change adds intrinsics and clang builtins for the remaining float
to fp16 conversions. This includes the following conversions:

- float to bf16x2 - satfinite variants
- float to f16x2 - satfinite variants
- float to bf16 - satfinite variants
- float to f16 - all variants

Tests are added in `convert-sm80.ll` and `convert-sm80-sf.ll` for the
intrinsics and in `builtins-nvptx.c` for the clang builtins.
@Wolfram70 Wolfram70 force-pushed the dev/Wolfram70/nvptx-f32-fp16-missing branch from 1854d31 to e0f41d4 Compare November 18, 2025 05:06
@github-actions
Copy link

🐧 Linux x64 Test Results

  • 192697 tests passed
  • 6165 tests skipped

@Wolfram70 Wolfram70 merged commit ec90912 into llvm:main Nov 19, 2025
8 of 10 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 19, 2025

LLVM Buildbot has detected a new failure on builder arc-builder running on arc-worker while building clang,llvm at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/3/builds/25058

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'LLVM :: CodeGen/X86/sse2-intrinsics-fast-isel.ll' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 2
/buildbot/worker/arc-folder/build/bin/llc < /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | /buildbot/worker/arc-folder/build/bin/FileCheck /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --check-prefixes=CHECK,X86,SSE,X86-SSE
# executed command: /buildbot/worker/arc-folder/build/bin/llc -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2
# .---command stderr------------
# | LLVM ERROR: Cannot select: intrinsic %llvm.x86.sse2.clflush
# | PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug.
# | Stack dump:
# | 0.	Program arguments: /buildbot/worker/arc-folder/build/bin/llc -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2
# | 1.	Running pass 'Function Pass Manager' on module '<stdin>'.
# | 2.	Running pass 'X86 DAG->DAG Instruction Selection' on function '@test_mm_clflush'
# |  #0 0x0000000002394e78 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/buildbot/worker/arc-folder/build/bin/llc+0x2394e78)
# |  #1 0x0000000002391d85 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
# |  #2 0x00007f6103912630 __restore_rt sigaction.c:0:0
# |  #3 0x00007f61026623d7 raise (/usr/lib64/libc.so.6+0x363d7)
# |  #4 0x00007f6102663ac8 abort (/usr/lib64/libc.so.6+0x37ac8)
# |  #5 0x000000000072c3c5 llvm::json::operator==(llvm::json::Value const&, llvm::json::Value const&) (.cold) JSON.cpp:0:0
# |  #6 0x0000000002114289 llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/buildbot/worker/arc-folder/build/bin/llc+0x2114289)
# |  #7 0x0000000002118eb9 llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/buildbot/worker/arc-folder/build/bin/llc+0x2118eb9)
# |  #8 0x000000000096e567 (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*) X86ISelDAGToDAG.cpp:0:0
# |  #9 0x000000000210fa6f llvm::SelectionDAGISel::DoInstructionSelection() (/buildbot/worker/arc-folder/build/bin/llc+0x210fa6f)
# | #10 0x0000000002120198 llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/buildbot/worker/arc-folder/build/bin/llc+0x2120198)
# | #11 0x0000000002124483 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/buildbot/worker/arc-folder/build/bin/llc+0x2124483)
# | #12 0x0000000002124f75 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (/buildbot/worker/arc-folder/build/bin/llc+0x2124f75)
# | #13 0x000000000210f27f llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) (/buildbot/worker/arc-folder/build/bin/llc+0x210f27f)
# | #14 0x00000000012231d7 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (.part.0) MachineFunctionPass.cpp:0:0
# | #15 0x000000000189a53b llvm::FPPassManager::runOnFunction(llvm::Function&) (/buildbot/worker/arc-folder/build/bin/llc+0x189a53b)
# | #16 0x000000000189a8e1 llvm::FPPassManager::runOnModule(llvm::Module&) (/buildbot/worker/arc-folder/build/bin/llc+0x189a8e1)
# | #17 0x000000000189b4f5 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/buildbot/worker/arc-folder/build/bin/llc+0x189b4f5)
# | #18 0x0000000000810740 compileModule(char**, llvm::LLVMContext&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>&) llc.cpp:0:0
# | #19 0x0000000000734949 main (/buildbot/worker/arc-folder/build/bin/llc+0x734949)
# | #20 0x00007f610264e555 __libc_start_main (/usr/lib64/libc.so.6+0x22555)
# | #21 0x0000000000805816 _start (/buildbot/worker/arc-folder/build/bin/llc+0x805816)
# `-----------------------------
# error: command failed with exit status: -6
# executed command: /buildbot/worker/arc-folder/build/bin/FileCheck /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll --check-prefixes=CHECK,X86,SSE,X86-SSE
# .---command stderr------------
# | /buildbot/worker/arc-folder/llvm-project/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll:399:14: error: SSE-LABEL: expected string not found in input
# | ; SSE-LABEL: test_mm_bsrli_si128:
# |              ^
# | <stdin>:170:21: note: scanning from here
# | test_mm_bslli_si128: # @test_mm_bslli_si128
# |                     ^
# | <stdin>:178:9: note: possible intended match here
# |  .globl test_mm_bsrli_si128 # 
# |         ^
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 19, 2025

LLVM Buildbot has detected a new failure on builder ppc64le-mlir-rhel-clang running on ppc64le-mlir-rhel-test while building clang,llvm at step 3 "clean-build-dir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/129/builds/33473

Here is the relevant piece of the build log for the reference
Step 3 (clean-build-dir) failure: Delete failed. (failure) (timed out)
Step 4 (cmake-configure) failure: cmake (failure) (timed out)
-- The C compiler identification is Clang 19.1.7
command timed out: 1200 seconds without output running [b'cmake', b'-DLLVM_TARGETS_TO_BUILD=PowerPC', b'-DLLVM_INSTALL_UTILS=ON', b'-DCMAKE_CXX_STANDARD=17', b'-DLLVM_ENABLE_PROJECTS=mlir', b'-DLLVM_LIT_ARGS=-vj 256', b'-DCMAKE_C_COMPILER_LAUNCHER=ccache', b'-DCMAKE_CXX_COMPILER_LAUNCHER=ccache', b'-DCMAKE_BUILD_TYPE=Release', b'-DLLVM_ENABLE_ASSERTIONS=ON', b'-GNinja', b'../llvm-project/llvm'], attempting to kill
process killed by signal 9
program finished with exit code -1
elapsedTime=1738.900575

@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 19, 2025

LLVM Buildbot has detected a new failure on builder llvm-nvptx64-nvidia-win running on as-builder-8 while building clang,llvm at step 7 "test-build-unified-tree-check-llvm".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/155/builds/14977

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-llvm) failure: test (failure)
******************** TEST 'LLVM-Unit :: Support/./SupportTests.exe/79/105' FAILED ********************
Script(shard):
--
GTEST_OUTPUT=json:C:\buildbot\as-builder-8\llvm-nvptx64-nvidia-win\build\unittests\Support\.\SupportTests.exe-LLVM-Unit-27980-79-105.json GTEST_SHUFFLE=0 GTEST_TOTAL_SHARDS=105 GTEST_SHARD_INDEX=79 C:\buildbot\as-builder-8\llvm-nvptx64-nvidia-win\build\unittests\Support\.\SupportTests.exe
--


Note: This is test shard 80 of 105.

[==========] Running 16 tests from 16 test suites.

[----------] Global test environment set-up.

[----------] 1 test from BinaryStreamTest

[ RUN      ] BinaryStreamTest.DropOperations

[       OK ] BinaryStreamTest.DropOperations (0 ms)

[----------] 1 test from BinaryStreamTest (0 ms total)



[----------] 1 test from CommandLineTest

[ RUN      ] CommandLineTest.TokenizeAndMarkEOLs

[       OK ] CommandLineTest.TokenizeAndMarkEOLs (0 ms)

[----------] 1 test from CommandLineTest (0 ms total)



[----------] 1 test from DataExtractorTest

[ RUN      ] DataExtractorTest.LEB128_error

[       OK ] DataExtractorTest.LEB128_error (0 ms)

[----------] 1 test from DataExtractorTest (0 ms total)



[----------] 1 test from Error

[ RUN      ] Error.ForwardToExpected

[       OK ] Error.ForwardToExpected (0 ms)

[----------] 1 test from Error (0 ms total)
...

@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 19, 2025

LLVM Buildbot has detected a new failure on builder clang-aarch64-sve-vls-2stage running on linaro-g3-03 while building clang,llvm at step 12 "ninja check 2".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/4/builds/10591

Here is the relevant piece of the build log for the reference
Step 12 (ninja check 2) failure: stage 2 checked (failure)
******************** TEST 'HWAddressSanitizer-aarch64 :: TestCases/hwasan_symbolize_stack_overflow.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
rm -rf /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp; mkdir /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp # RUN: at line 1
+ rm -rf /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp
+ mkdir /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp
/home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/./bin/clang    -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta   -gline-tables-only -fsanitize=hwaddress -fuse-ld=lld -mllvm -hwasan-globals -mllvm -hwasan-use-short-granules -mllvm -hwasan-instrument-landing-pads=0 -mllvm -hwasan-instrument-personality-functions -Wl,--build-id -g /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp -o /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow # RUN: at line 2
+ /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/./bin/clang -Wthread-safety -Wthread-safety-reference -Wthread-safety-beta -gline-tables-only -fsanitize=hwaddress -fuse-ld=lld -mllvm -hwasan-globals -mllvm -hwasan-use-short-granules -mllvm -hwasan-instrument-landing-pads=0 -mllvm -hwasan-instrument-personality-functions -Wl,--build-id -g /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp -o /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow
env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not  /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow 16 2>&1 | /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index | FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,AFTER0 # RUN: at line 3
+ env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow 16
+ /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index
+ FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,AFTER0
Could not find symbols for lib/aarch64-linux-gnu/libc.so.6 (Build ID: 034c9554b03099fc969ae2528ea6fd26286057d8)
env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not  /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow 17 2>&1 | /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index | FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,AFTER1 # RUN: at line 4
+ /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index
+ FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,AFTER1
+ env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow 17
Could not find symbols for lib/aarch64-linux-gnu/libc.so.6 (Build ID: 034c9554b03099fc969ae2528ea6fd26286057d8)
env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not  /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow -1 2>&1 | /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index | FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,BEFORE1 # RUN: at line 5
+ /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index
+ env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow -1
+ FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,BEFORE1
Could not find symbols for lib/aarch64-linux-gnu/libc.so.6 (Build ID: 034c9554b03099fc969ae2528ea6fd26286057d8)
env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not  /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow -17 2>&1 | /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index | FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,BEFORE17 # RUN: at line 6
+ env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow -17
+ /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index
+ FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,BEFORE17
Could not find symbols for lib/aarch64-linux-gnu/libc.so.6 (Build ID: 034c9554b03099fc969ae2528ea6fd26286057d8)
env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not  /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow 1016 2>&1 | /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index | FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,AFTER1000 # RUN: at line 7
+ FileCheck /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp --check-prefixes=CHECK,AFTER1000
+ env HWASAN_OPTIONS=disable_allocator_tagging=1:random_tags=0:fail_without_syscall_abi=0:symbolize=0 not /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp/hwasan_overflow 1016
+ /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/bin/hwasan_symbolize --symbols /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/stage2/runtimes/runtimes-bins/compiler-rt/test/hwasan/AARCH64/TestCases/Output/hwasan_symbolize_stack_overflow.cpp.tmp --index
Could not find symbols for lib/aarch64-linux-gnu/libc.so.6 (Build ID: 034c9554b03099fc969ae2528ea6fd26286057d8)
/home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp:21:12: error: CHECK: expected string not found in input
 // CHECK: Potentially referenced stack object:
           ^
<stdin>:1:1: note: scanning from here
==3935722==ERROR: HWAddressSanitizer: tag-mismatch on address 0xffffcba001a8 at pc 0xb0c4a3afcbf4
^
<stdin>:10:13: note: possible intended match here
Address 0xffffcba001a8 is located in stack of thread T0
            ^

Input file: <stdin>
Check file: /home/tcwg-buildbot/worker/clang-aarch64-sve-vls-2stage/llvm/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_overflow.cpp

-dump-input=help explains the following input dump.

...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:NVPTX clang:frontend Language frontend issues, e.g. anything involving "Sema" clang Clang issues not falling into any other category llvm:ir

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants