From 7e48ec701ea857f630c8bed58868b5646be4220c Mon Sep 17 00:00:00 2001 From: William Moses Date: Tue, 18 Mar 2025 01:18:43 +0100 Subject: [PATCH 1/4] [MLIR][GPUToNVVM] Support 32-bit isfinite --- mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 159a6afd4a917..9e00bb34f4cb4 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -552,8 +552,8 @@ void mlir::populateLibDeviceConversionPatterns( "__nv_floor"); populateOpPatterns(converter, patterns, benefit, "__nv_fmaf", "__nv_fma"); - // Note: libdevice does not provide `__nv_isfinitef` as of moment of writing. - populateOpPatterns(converter, patterns, benefit, "", + // Note: libdevice uses a different name for 32-bit finite checking + populateOpPatterns(converter, patterns, benefit, "__nv_finitef", "__nv_isfinited"); populateOpPatterns(converter, patterns, benefit, "__nv_isinff", "__nv_isinfd"); From 287e687d4b6e603c8485bcbb44b5c2626dd141ba Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Tue, 18 Mar 2025 09:43:57 +0900 Subject: [PATCH 2/4] Add test --- mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 14594cd6badb1..ce8da38f24179 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1083,9 +1083,9 @@ gpu.module @test_module_54 { // CHECK: llvm.mlir.constant(0 // CHECK: llvm.icmp "ne" %3 = math.isnan %f64 : f64 - // Note: for some reason, libdevice does not provide isfinite for f32, so - // this should fail to convert. - // CHECK: math.isfinite {{.*}} : f32 + // CHECK: llvm.call @__nv_finitef(%{{.*}}) : (f32) -> i32 + // CHECK: llvm.mlir.constant(0 + // CHECK: llvm.icmp "ne" %4 = math.isfinite %f32 : f32 // CHECK: llvm.call @__nv_isfinited(%{{.*}}) : (f64) -> i32 // CHECK: llvm.mlir.constant(0 From b824afd9e8f52c83f8c0ab2e78e38f7629a4d291 Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Tue, 18 Mar 2025 09:44:08 +0900 Subject: [PATCH 3/4] clang-format --- mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 9e00bb34f4cb4..958d0d085fce1 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -553,8 +553,8 @@ void mlir::populateLibDeviceConversionPatterns( populateOpPatterns(converter, patterns, benefit, "__nv_fmaf", "__nv_fma"); // Note: libdevice uses a different name for 32-bit finite checking - populateOpPatterns(converter, patterns, benefit, "__nv_finitef", - "__nv_isfinited"); + populateOpPatterns(converter, patterns, benefit, + "__nv_finitef", "__nv_isfinited"); populateOpPatterns(converter, patterns, benefit, "__nv_isinff", "__nv_isinfd"); populateOpPatterns(converter, patterns, benefit, "__nv_isnanf", From ee95fa6b2336e947bcd10075c52333251d7d62ce Mon Sep 17 00:00:00 2001 From: Ivan Radanov Ivanov Date: Tue, 18 Mar 2025 09:49:14 +0900 Subject: [PATCH 4/4] Also check function signature --- mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index ce8da38f24179..6d4555e815b66 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1064,6 +1064,7 @@ gpu.module @test_module_54 { // CHECK: llvm.func @__nv_isinfd(f64) -> i32 // CHECK: llvm.func @__nv_isnanf(f32) -> i32 // CHECK: llvm.func @__nv_isnand(f64) -> i32 + // CHECK: llvm.func @__nv_finitef(f32) -> i32 // CHECK: llvm.func @__nv_isfinited(f64) -> i32 // CHECK-LABEL: @fpclassify func.func @fpclassify(%f32: f32, %f64: f64) -> (i1, i1, i1, i1, i1, i1) {