-
Notifications
You must be signed in to change notification settings - Fork 14.6k
Open
Labels
Description
test commit: f2e244f
Description:
I am experiencing an inconsistent result when executing the same MLIR program with and without the -convert-math-to-spirv.
Steps to Reproduce:
Minimal MLIR program (test.mlir):
module {
func.func private @printMemrefF32(tensor<*xf32>)
func.func @main() {
%0 = "tosa.const"() <{values = dense<7.077000e+01> : tensor<2x3x4x2x3xf32>}> : () -> tensor<2x3x4x2x3xf32>
%1 = tosa.tanh %0 : (tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
%cast = tensor.cast %1 : tensor<2x3x4x2x3xf32> to tensor<*xf32>
call @printMemrefF32(%cast) : (tensor<*xf32>) -> ()
return
}
}
1. Without -convert-math-to-spirv:
Command:
/home/workdir/llvm-project/build/bin/mlir-opt test.mlir --pass-pipeline="builtin.module(func.func(tosa-to-linalg))"| \
/home/workdir/llvm-project/build/bin/mlir-opt -convert-spirv-to-llvm -sparsifier="vl=8" | \
/home/workdir/llvm-project/build/bin/mlir-runner -e main -entry-point-result=void -shared-libs=/home/workdir/llvm-project/build/lib/libmlir_runner_utils.so
Output:
[[[[[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]]], [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]]], [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]]]], [[[[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]]], [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]]], [[[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [1, 1, 1]]]]]
2. With -convert-math-to-spirv:
/home/workdir/llvm-project/build/bin/mlir-opt test.mlir --pass-pipeline="builtin.module(func.func(tosa-to-linalg))"| \
/home/workdir/llvm-project/build/bin/mlir-opt -convert-math-to-spirv -convert-spirv-to-llvm -sparsifier="vl=8" | \
/home/workdir/llvm-project/build/bin/mlir-runner -e main -entry-point-result=void -shared-libs=/home/workdir/llvm-project/build/lib/libmlir_runner_utils.so
Output:
[[[[[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]]], [[[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]]], [[[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]]]], [[[[ -nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]]], [[[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]]], [[[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]], [[-nan, -nan, -nan], [-nan, -nan, -nan]]]]]
Root Cause:
I tried to analyse the root cause by executing the following command (with -convert-spirv-to-llvm):
/home/workdir/llvm-project/build/bin/mlir-opt test.mlir --pass-pipeline="builtin.module(func.func(tosa-to- linalg))"|
/home/workdir/llvm-project/build/bin/mlir-opt -convert-math-to-spirv -convert-spirv-to-llvm
Output IR:
#map = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
module {
func.func private @printMemrefF32(tensor<*xf32>)
func.func @main() {
%0 = "tosa.const"() <{values = dense<7.077000e+01> : tensor<2x3x4x2x3xf32>}> : () -> tensor<2x3x4x2x3xf32>
%1 = tensor.empty() : tensor<2x3x4x2x3xf32>
%2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%0 : tensor<2x3x4x2x3xf32>) outs(%1 : tensor<2x3x4x2x3xf32>) {
^bb0(%in: f32, %out: f32):
%3 = llvm.mlir.constant(2.000000e+00 : f32) : f32
%4 = llvm.fmul %3, %in : f32
%5 = llvm.intr.exp(%4) : (f32) -> f32
%6 = llvm.mlir.constant(1.000000e+00 : f32) : f32
%7 = llvm.fsub %5, %6 : f32
%8 = llvm.fadd %5, %6 : f32
%9 = llvm.fdiv %7, %8 : f32
linalg.yield %9 : f32
} -> tensor<2x3x4x2x3xf32>
%cast = tensor.cast %2 : tensor<2x3x4x2x3xf32> to tensor<*xf32>
call @printMemrefF32(%cast) : (tensor<*xf32>) -> ()
return
}
}
It looks like there's a bug here.
The issue lies in %5 = llvm.intr.exp(%4) : (f32) -> f32. The input tensor is filled with a constant value of 70.77, so when you compute e^(2x)
, the result overflows to +Inf in float32 precision. As a result, every element in the output tensor ends up being NaN (Not a Number).