diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 159a6afd4a917..958d0d085fce1 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -552,9 +552,9 @@ void mlir::populateLibDeviceConversionPatterns( "__nv_floor"); populateOpPatterns(converter, patterns, benefit, "__nv_fmaf", "__nv_fma"); - // Note: libdevice does not provide `__nv_isfinitef` as of moment of writing. - populateOpPatterns(converter, patterns, benefit, "", - "__nv_isfinited"); + // Note: libdevice uses a different name for 32-bit finite checking + populateOpPatterns(converter, patterns, benefit, + "__nv_finitef", "__nv_isfinited"); populateOpPatterns(converter, patterns, benefit, "__nv_isinff", "__nv_isinfd"); populateOpPatterns(converter, patterns, benefit, "__nv_isnanf", diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 14594cd6badb1..6d4555e815b66 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1064,6 +1064,7 @@ gpu.module @test_module_54 { // CHECK: llvm.func @__nv_isinfd(f64) -> i32 // CHECK: llvm.func @__nv_isnanf(f32) -> i32 // CHECK: llvm.func @__nv_isnand(f64) -> i32 + // CHECK: llvm.func @__nv_finitef(f32) -> i32 // CHECK: llvm.func @__nv_isfinited(f64) -> i32 // CHECK-LABEL: @fpclassify func.func @fpclassify(%f32: f32, %f64: f64) -> (i1, i1, i1, i1, i1, i1) { @@ -1083,9 +1084,9 @@ gpu.module @test_module_54 { // CHECK: llvm.mlir.constant(0 // CHECK: llvm.icmp "ne" %3 = math.isnan %f64 : f64 - // Note: for some reason, libdevice does not provide isfinite for f32, so - // this should fail to convert. - // CHECK: math.isfinite {{.*}} : f32 + // CHECK: llvm.call @__nv_finitef(%{{.*}}) : (f32) -> i32 + // CHECK: llvm.mlir.constant(0 + // CHECK: llvm.icmp "ne" %4 = math.isfinite %f32 : f32 // CHECK: llvm.call @__nv_isfinited(%{{.*}}) : (f64) -> i32 // CHECK: llvm.mlir.constant(0