[mlir][sparse][taco] Add support for float32.

Previously, we only support float64. We now support float32 and float64. When constructing a tensor without providing a data type, the default is float32. Fix the tests to data type consistency. All PyTACO application tests now use float32 to match the default data type of TACO. Other tests may use float32 or float64. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D120356
llvm · Feb 24, 2022 · c8ae8cf · c8ae8cf
1 parent 47d18be
commit c8ae8cf
Show file tree

Hide file tree

Showing 8 changed files with 112 additions and 60 deletions.
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
@@ -30,8 +30,8 @@
 
 # These two lines have been modified from the original program to use static
 # data to support result comparison.
-C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float64))
-D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float64))
+C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float32))
+D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float32))
 
 # Declare the result to be a dense matrix.
 A = pt.tensor([B.shape[0], 25], rm)

diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SDDMM.py
@@ -15,8 +15,8 @@
 i, j, k = pt.get_index_vars(3)
 
 # Set up dense matrices.
-A = pt.from_array(np.full((8, 8), 2.0))
-B = pt.from_array(np.full((8, 8), 3.0))
+A = pt.from_array(np.full((8, 8), 2.0, dtype=np.float32))
+B = pt.from_array(np.full((8, 8), 3.0, dtype=np.float32))
 
 # Set up sparse matrices.
 S = pt.tensor([8, 8], pt.format([pt.compressed, pt.compressed]))

diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
@@ -31,8 +31,8 @@
 
 # These two lines have been modified from the original program to use static
 # data to support result comparison.
-x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float64))
-z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float64))
+x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float32))
+z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float32))
 
 # Declare the result to be a dense vector
 y = pt.tensor([A.shape[0]], dv)

diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_true_dense_tensor_algebra.py
@@ -14,7 +14,7 @@
 B = pt.from_array(np.full([2,3], 2, dtype=np.float64))
 # Define the result tensor as a true dense tensor. The parameter is_dense=True
 # is an MLIR-PyTACO extension.
-C = pt.tensor([2, 3], is_dense=True)
+C = pt.tensor([2, 3], dtype=pt.float64, is_dense=True)
 
 C[i, j] = A[i, j] + B[i, j]
 

diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
@@ -96,7 +96,7 @@ class DType:
     kind: A Type enum representing the data type.
     value: The numpy data type for the TACO data type.
   """
-  kind: Type = Type.FLOAT64
+  kind: Type = Type.FLOAT32
 
   def is_float(self) -> bool:
     """Returns whether the data type represents a floating point value."""
@@ -112,6 +112,30 @@ def value(self) -> _AnyRuntimeType:
     return self.kind.value
 
 
+def _dtype_to_mlir_str(dtype: DType) -> str:
+  """Returns the MLIR string for the given dtype."""
+  dtype_to_str = {
+      Type.INT16: "i16",
+      Type.INT32: "i32",
+      Type.INT64: "i64",
+      Type.FLOAT32: "f32",
+      Type.FLOAT64: "f64"
+  }
+  return dtype_to_str[dtype.kind]
+
+
+def _nptype_to_taco_type(ty: np.dtype) -> DType:
+  """Returns the TACO type for the given numpy type."""
+  nptype_to_dtype = {
+      np.int16: Type.INT16,
+      np.int32: Type.INT32,
+      np.int64: Type.INT64,
+      np.float32: Type.FLOAT32,
+      np.float64: Type.FLOAT64
+  }
+  return DType(nptype_to_dtype[ty])
+
+
 def _mlir_type_from_taco_type(dtype: DType) -> ir.Type:
   """Returns the MLIR type corresponding to the given TACO type."""
   dtype_to_irtype = {
@@ -123,7 +147,6 @@ def _mlir_type_from_taco_type(dtype: DType) -> ir.Type:
   }
   return dtype_to_irtype[dtype.kind]
 
-
 def _ctype_pointer_from_array(array: np.ndarray) -> ctypes.pointer:
   """Returns the ctype pointer for the given numpy array."""
   return ctypes.pointer(
@@ -632,7 +655,7 @@ def __init__(self,
     """
     # Take care of the argument default values common to both sparse tensors
     # and dense tensors.
-    dtype = dtype or DType(Type.FLOAT64)
+    dtype = dtype or DType(Type.FLOAT32)
     self._name = name or self._get_unique_name()
     self._assignment = None
     self._sparse_value_location = _SparseValueInfo._UNPACKED
@@ -688,7 +711,7 @@ def unpack(self) -> None:
     # Use the output MLIR sparse tensor pointer to retrieve the COO-flavored
     # values and verify the values.
     rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor(
-        self._packed_sparse_value, np.float64)
+        self._packed_sparse_value, self._dtype.value)
     assert rank == self.order
     assert np.allclose(self.shape, shape)
     assert nse == len(values)
@@ -757,7 +780,8 @@ def to_array(self) -> np.ndarray:
   def from_array(array: np.ndarray) -> "Tensor":
     """Returns a dense tensor with the value copied from the input array.
 
-    We currently only support the conversion of float64 numpy arrays to Tensor.
+    We currently only support the conversion of float32 and float64 numpy arrays
+    to Tensor.
 
     Args:
       array: The numpy array that provides the data type, shape and value for
@@ -767,11 +791,14 @@ def from_array(array: np.ndarray) -> "Tensor":
       A Tensor object.
 
     Raises:
-      ValueError if the data type of the numpy array is not float64.
+      ValueError if the data type of the numpy array is not supported.
     """
-    if array.dtype != np.float64:
-      raise ValueError(f"Expected float64 value type: {array.dtype}.")
-    tensor = Tensor(array.shape, is_dense=True)
+    if array.dtype != np.float32 and array.dtype != np.float64:
+      raise ValueError(f"Expected floating point value type: {array.dtype}.")
+    tensor = Tensor(
+        array.shape,
+        dtype=_nptype_to_taco_type(array.dtype.type),
+        is_dense=True)
     tensor._dense_storage = np.copy(array)
     return tensor
 
@@ -808,7 +835,7 @@ def from_coo(
     # The size of each dimension is one more that such a maximum coordinate
     # value.
     shape = [c + 1 for c in max_coordinate]
-    tensor = Tensor(shape, fmt)
+    tensor = Tensor(shape, fmt, dtype=dtype)
     tensor._coords = coordinates
     tensor._values = values
 
@@ -833,8 +860,9 @@ def from_file(
       value is stored as an MLIR sparse tensor.
     """
     sparse_tensor, shape = utils.create_sparse_tensor(filename,
-                                                      fmt.format_pack.formats)
-    tensor = Tensor(shape.tolist(), fmt)
+                                                      fmt.format_pack.formats,
+                                                      _dtype_to_mlir_str(dtype))
+    tensor = Tensor(shape.tolist(), fmt, dtype=dtype)
     tensor._set_packed_sparse_tensor(sparse_tensor)
 
     return tensor
@@ -862,7 +890,8 @@ def to_file(self, filename: str) -> None:
                        "supported.")
 
     utils.output_sparse_tensor(self._packed_sparse_value, filename,
-                               self._format.format_pack.formats)
+                               self._format.format_pack.formats,
+                               _dtype_to_mlir_str(self._dtype))
 
   @property
   def dtype(self) -> DType:

diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
@@ -31,7 +31,8 @@
 _TNS_FILENAME_SUFFIX = ".tns"
 
 
-def read(filename: str, fmt: Format) -> Tensor:
+def read(filename: str, fmt: Format,
+         dtype: DType = DType(Type.FLOAT32)) -> Tensor:
   """Inputs a tensor from a given file.
 
   The name suffix of the file specifies the format of the input tensor. We
@@ -40,6 +41,7 @@ def read(filename: str, fmt: Format) -> Tensor:
   Args:
     filename: A string input filename.
     fmt: The storage format of the tensor.
+    dtype: The data type, default to float32.
 
   Raises:
     ValueError: If filename doesn't end with .mtx or .tns, or fmt is not an
@@ -52,7 +54,7 @@ def read(filename: str, fmt: Format) -> Tensor:
                      f"{_MTX_FILENAME_SUFFIX} or {_TNS_FILENAME_SUFFIX}: "
                      f"{filename}.")
 
-  return Tensor.from_file(filename, fmt, DType(Type.FLOAT64))
+  return Tensor.from_file(filename, fmt, dtype)
 
 
 def write(filename: str, tensor: Tensor) -> None: