From efaa78cae08024a6d0d329234695e0e22c7458bc Mon Sep 17 00:00:00 2001
From: bixia1 <bixia@google.com>
Date: Wed, 7 Dec 2022 15:39:17 -0800
Subject: [PATCH] [mlir][sparse] Replace vector.print with printMemref for some
 tests.

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D139489
---
 .../Dialect/SparseTensor/CPU/concatenate.mlir | 222 +++++++++++-------
 .../SparseTensor/CPU/dense_output.mlir        |  12 +-
 .../CPU/sparse_conversion_dyn.mlir            |  38 ++-
 .../SparseTensor/CPU/sparse_expand.mlir       |  25 +-
 .../SparseTensor/CPU/sparse_flatten.mlir      |  23 +-
 .../SparseTensor/CPU/sparse_matmul.mlir       | 119 +++++-----
 .../SparseTensor/CPU/sparse_matrix_ops.mlir   |  40 +++-
 .../SparseTensor/CPU/sparse_mttkrp.mlir       |  13 +-
 .../SparseTensor/CPU/sparse_rewrite_sort.mlir |  64 +++--
 9 files changed, 328 insertions(+), 228 deletions(-)

diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir
index e883ea37c7bcf..441024f1aa04c 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/concatenate.mlir
@@ -2,13 +2,13 @@
 // DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{command}
 //
 // Do the same run, but now with direct IR generation.
-// REDEFINE: %{option} = enable-runtime-library=false
+// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true"
 // RUN: %{command}
 
 #MAT_C_C = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
@@ -35,6 +35,9 @@
 }>
 
 module {
+  func.func private @printMemrefF64(%ptr : tensor<*xf64>)
+  func.func private @printMemref1dF64(%ptr : memref<?xf64>) attributes { llvm.emit_c_interface }
+
   //
   // Tests without permutation.
   //
@@ -180,125 +183,95 @@ module {
   }
 
   func.func @dump_mat_9x4(%A: tensor<9x4xf64, #MAT_C_C>) {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
     %c = sparse_tensor.convert %A : tensor<9x4xf64, #MAT_C_C> to tensor<9x4xf64>
-    %v = vector.transfer_read %c[%c0, %c0], %du: tensor<9x4xf64>, vector<9x4xf64>
-    vector.print %v : vector<9x4xf64>
+    %cu = tensor.cast %c : tensor<9x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%cu) : (tensor<*xf64>) -> ()
 
     %n = sparse_tensor.number_of_entries %A : tensor<9x4xf64, #MAT_C_C>
     vector.print %n : index
 
     %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_C_C> to memref<?xf64>
-    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<18xf64>
-    vector.print %2 : vector<18xf64>
+    call @printMemref1dF64(%1) : (memref<?xf64>) -> ()
 
     return
   }
 
   func.func @dump_mat_perm_9x4(%A: tensor<9x4xf64, #MAT_C_C_P>) {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
     %c = sparse_tensor.convert %A : tensor<9x4xf64, #MAT_C_C_P> to tensor<9x4xf64>
-    %v = vector.transfer_read %c[%c0, %c0], %du: tensor<9x4xf64>, vector<9x4xf64>
-    vector.print %v : vector<9x4xf64>
+    %cu = tensor.cast %c : tensor<9x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%cu) : (tensor<*xf64>) -> ()
 
     %n = sparse_tensor.number_of_entries %A : tensor<9x4xf64, #MAT_C_C_P>
     vector.print %n : index
 
     %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_C_C_P> to memref<?xf64>
-    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<18xf64>
-    vector.print %2 : vector<18xf64>
+    call @printMemref1dF64(%1) : (memref<?xf64>) -> ()
 
     return
   }
 
   func.func @dump_mat_dense_9x4(%A: tensor<9x4xf64>) {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
-    %v = vector.transfer_read %A[%c0, %c0], %du: tensor<9x4xf64>, vector<9x4xf64>
-    vector.print %v : vector<9x4xf64>
+    %u = tensor.cast %A : tensor<9x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%u) : (tensor<*xf64>) -> ()
 
     return
   }
 
   func.func @dump_mat_annotated_dense_9x4(%A: tensor<9x4xf64, #MAT_D_D>) {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
     %n = sparse_tensor.number_of_entries %A : tensor<9x4xf64, #MAT_D_D>
     vector.print %n : index
 
     %1 = sparse_tensor.values %A : tensor<9x4xf64, #MAT_D_D> to memref<?xf64>
-    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<36xf64>
-    vector.print %2 : vector<36xf64>
+    call @printMemref1dF64(%1) : (memref<?xf64>) -> ()
 
     return
   }
 
   func.func @dump_mat_4x9(%A: tensor<4x9xf64, #MAT_C_C>) {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
     %c = sparse_tensor.convert %A : tensor<4x9xf64, #MAT_C_C> to tensor<4x9xf64>
-    %v = vector.transfer_read %c[%c0, %c0], %du: tensor<4x9xf64>, vector<4x9xf64>
-    vector.print %v : vector<4x9xf64>
+    %cu = tensor.cast %c : tensor<4x9xf64> to tensor<*xf64>
+    call @printMemrefF64(%cu) : (tensor<*xf64>) -> ()
 
     %n = sparse_tensor.number_of_entries %A : tensor<4x9xf64, #MAT_C_C>
     vector.print %n : index
 
     %1 = sparse_tensor.values %A : tensor<4x9xf64, #MAT_C_C> to memref<?xf64>
-    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<18xf64>
-    vector.print %2 : vector<18xf64>
+    call @printMemref1dF64(%1) : (memref<?xf64>) -> ()
 
     return
   }
 
   func.func @dump_mat_dyn(%A: tensor<?x?xf64, #MAT_C_C>) {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
     %c = sparse_tensor.convert %A : tensor<?x?xf64, #MAT_C_C> to tensor<?x?xf64>
-    %v = vector.transfer_read %c[%c0, %c0], %du: tensor<?x?xf64>, vector<4x9xf64>
-    vector.print %v : vector<4x9xf64>
+    %cu = tensor.cast %c : tensor<?x?xf64> to tensor<*xf64>
+    call @printMemrefF64(%cu) : (tensor<*xf64>) -> ()
 
     %n = sparse_tensor.number_of_entries %A : tensor<?x?xf64, #MAT_C_C>
     vector.print %n : index
 
     %1 = sparse_tensor.values %A : tensor<?x?xf64, #MAT_C_C> to memref<?xf64>
-    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<18xf64>
-    vector.print %2 : vector<18xf64>
+    call @printMemref1dF64(%1) : (memref<?xf64>) -> ()
 
     return
   }
 
   func.func @dump_mat_perm_4x9(%A: tensor<4x9xf64, #MAT_C_C_P>) {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
     %c = sparse_tensor.convert %A : tensor<4x9xf64, #MAT_C_C_P> to tensor<4x9xf64>
-    %v = vector.transfer_read %c[%c0, %c0], %du: tensor<4x9xf64>, vector<4x9xf64>
-    vector.print %v : vector<4x9xf64>
+    %cu = tensor.cast %c : tensor<4x9xf64> to tensor<*xf64>
+    call @printMemrefF64(%cu) : (tensor<*xf64>) -> ()
 
     %n = sparse_tensor.number_of_entries %A : tensor<4x9xf64, #MAT_C_C_P>
     vector.print %n : index
 
     %1 = sparse_tensor.values %A : tensor<4x9xf64, #MAT_C_C_P> to memref<?xf64>
-    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<18xf64>
-    vector.print %2 : vector<18xf64>
+    call @printMemref1dF64(%1) : (memref<?xf64>) -> ()
 
     return
   }
 
   func.func @dump_mat_dense_4x9(%A: tensor<4x9xf64>) {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
-    %v = vector.transfer_read %A[%c0, %c0], %du: tensor<4x9xf64>, vector<4x9xf64>
-    vector.print %v : vector<4x9xf64>
+    %1 = tensor.cast %A : tensor<4x9xf64> to tensor<*xf64>
+    call @printMemrefF64(%1) : (tensor<*xf64>) -> ()
 
     return
   }
@@ -343,111 +316,202 @@ module {
     %sm43cd_dyn = sparse_tensor.convert %m43 : tensor<4x3xf64> to tensor<?x?xf64, #MAT_C_D>
     %sm44dc_dyn = sparse_tensor.convert %m44 : tensor<4x4xf64> to tensor<?x?xf64, #MAT_D_C>
 
-    // CHECK:    ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   3,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0],
+    // CHECK-NEXT:  [1,   0,   1,   1],
+    // CHECK-NEXT:  [0,   0.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   1.5,   1],
+    // CHECK-NEXT:  [0,   3.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 3, 2, 1, 1, 1, 0.5, 1, 5, 2, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5 )
+    // CHECK:      [1,  3,  2,  1,  1,  1,  0.5,  1,  5,  2,  1.5,  1,  3.5,  1,  5,  2,  1,  0.5
     %0 = call @concat_sparse_sparse(%sm24cc, %sm34cd, %sm44dc)
                : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C>
     call @dump_mat_9x4(%0) : (tensor<9x4xf64, #MAT_C_C>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   3,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0],
+    // CHECK-NEXT:  [1,   0,   1,   1],
+    // CHECK-NEXT:  [0,   0.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   1.5,   1],
+    // CHECK-NEXT:  [0,   3.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [1,   0.5,   0,   0]]
     %1 = call @concat_sparse_dense(%sm24cc, %sm34cd, %sm44dc)
                : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64>
     call @dump_mat_dense_9x4(%1) : (tensor<9x4xf64>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   3,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0],
+    // CHECK-NEXT:  [1,   0,   1,   1],
+    // CHECK-NEXT:  [0,   0.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   1.5,   1],
+    // CHECK-NEXT:  [0,   3.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 3, 2, 1, 1, 1, 0.5, 1, 5, 2, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5 )
+    // CHECK:      [1,  3,  2,  1,  1,  1,  0.5,  1,  5,  2,  1.5,  1,  3.5,  1,  5,  2,  1,  0.5
     %2 = call @concat_mix_sparse(%m24, %sm34cd, %sm44dc)
                : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C>
     call @dump_mat_9x4(%2) : (tensor<9x4xf64, #MAT_C_C>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   3,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0],
+    // CHECK-NEXT:  [1,   0,   1,   1],
+    // CHECK-NEXT:  [0,   0.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   1.5,   1],
+    // CHECK-NEXT:  [0,   3.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [1,   0.5,   0,   0]]
     %3 = call @concat_mix_dense(%m24, %sm34cd, %sm44dc)
                : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64>
     call @dump_mat_dense_9x4(%3) : (tensor<9x4xf64>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   3,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0],
+    // CHECK-NEXT:  [1,   0,   1,   1],
+    // CHECK-NEXT:  [0,   0.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   1.5,   1],
+    // CHECK-NEXT:  [0,   3.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 1, 1, 1, 1, 2, 0.5, 5, 3.5, 5, 0.5, 3, 1, 2, 1.5, 2, 1, 1 )
+    // CHECK:      [1,  1,  1,  1,  1,  2,  0.5,  5,  3.5,  5,  0.5,  3,  1,  2,  1.5,  2,  1,  1
     %4 = call @concat_sparse_sparse_perm(%sm24ccp, %sm34cd, %sm44dc)
                : (tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C_P>
     call @dump_mat_perm_9x4(%4) : (tensor<9x4xf64, #MAT_C_C_P>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   3,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0],
+    // CHECK-NEXT:  [1,   0,   1,   1],
+    // CHECK-NEXT:  [0,   0.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   1.5,   1],
+    // CHECK-NEXT:  [0,   3.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [1,   0.5,   0,   0]]
     %5 = call @concat_sparse_dense_perm(%sm24ccp, %sm34cdp, %sm44dc)
                : (tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64>
     call @dump_mat_dense_9x4(%5) : (tensor<9x4xf64>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   3,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0],
+    // CHECK-NEXT:  [1,   0,   1,   1],
+    // CHECK-NEXT:  [0,   0.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   1.5,   1],
+    // CHECK-NEXT:  [0,   3.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 3, 2, 1, 1, 1, 0.5, 1, 5, 2, 1.5, 1, 3.5, 1, 5, 2, 1, 0.5 )
+    // CHECK:      [1,  3,  2,  1,  1,  1,  0.5,  1,  5,  2,  1.5,  1,  3.5,  1,  5,  2,  1,  0.5
     %6 = call @concat_mix_sparse_perm(%m24, %sm34cdp, %sm44dc)
                : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_C_C>
     call @dump_mat_9x4(%6) : (tensor<9x4xf64, #MAT_C_C>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 3, 0 ), ( 0, 2, 0, 0 ), ( 1, 0, 1, 1 ), ( 0, 0.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 0, 0, 1.5, 1 ), ( 0, 3.5, 0, 0 ), ( 1, 5, 2, 0 ), ( 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   3,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0],
+    // CHECK-NEXT:  [1,   0,   1,   1],
+    // CHECK-NEXT:  [0,   0.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   1.5,   1],
+    // CHECK-NEXT:  [0,   3.5,   0,   0],
+    // CHECK-NEXT:  [1,   5,   2,   0],
+    // CHECK-NEXT:  [1,   0.5,   0,   0]]
     %7 = call @concat_mix_dense_perm(%m24, %sm34cd, %sm44dcp)
                : (tensor<2x4xf64>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P>) -> tensor<9x4xf64>
     call @dump_mat_dense_9x4(%7) : (tensor<9x4xf64>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 1, 1, 1.5, 1, 3.1, 1, 0.5, 3.5, 2, 1, 1, 5, 2, 5, 2, 1, 0.5 )
+    // CHECK:      [1,  1,  1,  1.5,  1,  3.1,  1,  0.5,  3.5,  2,  1,  1,  5,  2,  5,  2,  1,  0.5
     %8 = call @concat_sparse_sparse_dim1(%sm42cc, %sm43cd, %sm44dc)
                : (tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C>
     call @dump_mat_4x9(%8) : (tensor<4x9xf64, #MAT_C_C>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     %9 = call @concat_sparse_dense_dim1(%sm42cc, %sm43cd, %sm44dc)
                : (tensor<4x2xf64, #MAT_C_C>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64>
     call @dump_mat_dense_4x9(%9) : (tensor<4x9xf64>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 1, 1, 1.5, 1, 3.1, 1, 0.5, 3.5, 2, 1, 1, 5, 2, 5, 2, 1, 0.5 )
+    // CHECK:      [1,  1,  1,  1.5,  1,  3.1,  1,  0.5,  3.5,  2,  1,  1,  5,  2,  5,  2,  1,  0.5
     %10 = call @concat_mix_sparse_dim1(%m42, %sm43cd, %sm44dc)
                : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C>
     call @dump_mat_4x9(%10) : (tensor<4x9xf64, #MAT_C_C>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     %11 = call @concat_mix_dense_dim1(%m42, %sm43cd, %sm44dc)
                : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64>
     call @dump_mat_dense_4x9(%11) : (tensor<4x9xf64>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 3.1, 2, 1, 1, 5, 2, 1, 0.5, 1, 1, 1, 3.5, 5, 0.5, 1.5, 2, 1 )
+    // CHECK:      [1,  3.1,  2,  1,  1,  5,  2,  1,  0.5,  1,  1,  1,  3.5,  5,  0.5,  1.5,  2,  1
     %12 = call @concat_sparse_sparse_perm_dim1(%sm42ccp, %sm43cd, %sm44dc)
                : (tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C_P>
     call @dump_mat_perm_4x9(%12) : (tensor<4x9xf64, #MAT_C_C_P>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     %13 = call @concat_sparse_dense_perm_dim1(%sm42ccp, %sm43cdp, %sm44dc)
                : (tensor<4x2xf64, #MAT_C_C_P>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64>
     call @dump_mat_dense_4x9(%13) : (tensor<4x9xf64>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 1, 1, 1.5, 1, 3.1, 1, 0.5, 3.5, 2, 1, 1, 5, 2, 5, 2, 1, 0.5 )
+    // CHECK:      [1,  1,  1,  1.5,  1,  3.1,  1,  0.5,  3.5,  2,  1,  1,  5,  2,  5,  2,  1,  0.5
     %14 = call @concat_mix_sparse_perm_dim1(%m42, %sm43cdp, %sm44dc)
                : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C>) -> tensor<4x9xf64, #MAT_C_C>
     call @dump_mat_4x9(%14) : (tensor<4x9xf64, #MAT_C_C>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     %15 = call @concat_mix_dense_perm_dim1(%m42, %sm43cd, %sm44dcp)
                : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C_P>) -> tensor<4x9xf64>
     call @dump_mat_dense_4x9(%15) : (tensor<4x9xf64>) -> ()
 
-    // CHECK-NEXT: ( ( 1, 0, 1, 0, 1, 0, 0, 1.5, 1 ), ( 3.1, 0, 1, 0, 0.5, 0, 3.5, 0, 0 ), ( 0, 2, 0, 0, 1, 1, 5, 2, 0 ), ( 0, 0, 5, 2, 0, 1, 0.5, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   0,   1,   0,   1,   0,   0,   1.5,   1],
+    // CHECK-NEXT:  [3.1,   0,   1,   0,   0.5,   0,   3.5,   0,   0],
+    // CHECK-NEXT:  [0,   2,   0,   0,   1,   1,   5,   2,   0],
+    // CHECK-NEXT:  [0,   0,   5,   2,   0,   1,   0.5,   0,   0]]
     // CHECK-NEXT: 18
-    // CHECK-NEXT: ( 1, 1, 1, 1.5, 1, 3.1, 1, 0.5, 3.5, 2, 1, 1, 5, 2, 5, 2, 1, 0.5 )
+    // CHECK:      [1,  1,  1,  1.5,  1,  3.1,  1,  0.5,  3.5,  2,  1,  1,  5,  2,  5,  2,  1,  0.5
     %16 = call @concat_mix_sparse_dyn(%m42, %sm43cd, %sm44dc)
                : (tensor<4x2xf64>, tensor<4x3xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<?x?xf64, #MAT_C_C>
     call @dump_mat_dyn(%16) : (tensor<?x?xf64, #MAT_C_C>) -> ()
 
     // CHECK-NEXT: 36
-    // CHECK-NEXT: ( 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 2, 0, 0.5, 5, 0, 3.5, 5, 0.5, 3, 0, 1, 0, 2, 1.5, 0, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0 )
+    // CHECK:      [1,  0,  1,  0,  1,  0,  0,  1,  1,  0,  2,  0,  0.5,  5,  0,  3.5,  5,  0.5,  3,  0,  1,  0,  2,  1.5,  0,  2,  0,  0,  0,  1,  0,  0,  1,  0,  0,  0
     %17 = call @concat_sparse_annotated_dense(%sm24cc, %sm34cd, %sm44dc)
                : (tensor<2x4xf64, #MAT_C_C>, tensor<3x4xf64, #MAT_C_D>, tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64, #MAT_D_D>
     call @dump_mat_annotated_dense_9x4(%17) : (tensor<9x4xf64, #MAT_D_D>) -> ()
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
index dd4352da0b782..e5c385bbfefae 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
@@ -3,7 +3,7 @@
 // DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.mtx" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{command}
@@ -66,6 +66,7 @@ module {
   }
 
   func.func private @getTensorFilename(index) -> (!Filename)
+  func.func private @printMemref1dF64(%ptr : memref<?xf64>) attributes { llvm.emit_c_interface }
 
   //
   // Main driver that reads matrix from file and calls the kernel.
@@ -86,13 +87,14 @@ module {
 
     //
     // Print the linearized 5x5 result for verification.
+    // CHECK: 25
+    // CHECK: [2,  0,  0,  2.8,  0,  0,  4,  0,  0,  5,  0,  0,  6,  0,  0,  8.2,  0,  0,  8,  0,  0,  10.4,  0,  0,  10
     //
-    // CHECK: ( 2, 0, 0, 2.8, 0, 0, 4, 0, 0, 5, 0, 0, 6, 0, 0, 8.2, 0, 0, 8, 0, 0, 10.4, 0, 0, 10 )
-    //
+    %n = sparse_tensor.number_of_entries %0 : tensor<?x?xf64, #DenseMatrix>
+    vector.print %n : index
     %m = sparse_tensor.values %0
       : tensor<?x?xf64, #DenseMatrix> to memref<?xf64>
-    %v = vector.load %m[%c0] : memref<?xf64>, vector<25xf64>
-    vector.print %v : vector<25xf64>
+    call @printMemref1dF64(%m) : (memref<?xf64>) -> ()
 
     // Release the resources.
     bufferization.dealloc_tensor %a : tensor<?x?xf64, #SparseMatrix>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_dyn.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_dyn.mlir
index 2d339d033fdb9..0fa6df8a37ebb 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_dyn.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conversion_dyn.mlir
@@ -2,7 +2,7 @@
 // DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{command}
@@ -27,15 +27,14 @@
 //
 module {
 
+  func.func private @printMemref1dF64(%ptr : memref<?xf64>) attributes { llvm.emit_c_interface }
+
   //
   // Helper method to print values array. The transfer actually
   // reads more than required to verify size of buffer as well.
   //
   func.func @dump(%arg0: memref<?xf64>) {
-    %c = arith.constant 0 : index
-    %d = arith.constant 0.0 : f64
-    %0 = vector.transfer_read %arg0[%c], %d: memref<?xf64>, vector<8xf64>
-    vector.print %0 : vector<8xf64>
+    call @printMemref1dF64(%arg0) : (memref<?xf64>) -> ()
     return
   }
 
@@ -55,15 +54,32 @@ module {
     %5 = sparse_tensor.convert %3 : tensor<?x?xf64, #DCSR> to tensor<?x?xf64, #DCSC>
     %6 = sparse_tensor.convert %4 : tensor<?x?xf64, #DCSC> to tensor<?x?xf64, #DCSR>
 
+//
+    // Check number_of_entries.
+    //
+    // CHECK-COUNT-6: 7
+    %n1 = sparse_tensor.number_of_entries %1 : tensor<?x?xf64, #DCSR>
+    %n2 = sparse_tensor.number_of_entries %2 : tensor<?x?xf64, #DCSC>
+    %n3 = sparse_tensor.number_of_entries %3 : tensor<?x?xf64, #DCSR>
+    %n4 = sparse_tensor.number_of_entries %4 : tensor<?x?xf64, #DCSC>
+    %n5 = sparse_tensor.number_of_entries %5 : tensor<?x?xf64, #DCSC>
+    %n6 = sparse_tensor.number_of_entries %6 : tensor<?x?xf64, #DCSR>
+    vector.print %n1 : index
+    vector.print %n2 : index
+    vector.print %n3 : index
+    vector.print %n4 : index
+    vector.print %n5 : index
+    vector.print %n6 : index
+
     //
     // All proper row-/column-wise?
     //
-    // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 0 )
-    // CHECK: ( 1, 4, 6, 2, 5, 3, 7, 0 )
-    // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 0 )
-    // CHECK: ( 1, 4, 6, 2, 5, 3, 7, 0 )
-    // CHECK: ( 1, 4, 6, 2, 5, 3, 7, 0 )
-    // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 0 )
+    // CHECK: [1,  2,  3,  4,  5,  6,  7
+    // CHECK: [1,  4,  6,  2,  5,  3,  7
+    // CHECK: [1,  2,  3,  4,  5,  6,  7
+    // CHECK: [1,  4,  6,  2,  5,  3,  7
+    // CHECK: [1,  4,  6,  2,  5,  3,  7
+    // CHECK: [1,  2,  3,  4,  5,  6,  7
     //
     %m1 = sparse_tensor.values %1 : tensor<?x?xf64, #DCSR> to memref<?xf64>
     %m2 = sparse_tensor.values %2 : tensor<?x?xf64, #DCSC> to memref<?xf64>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
index d8c165dc6a687..2af648f6d1d82 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
@@ -2,7 +2,7 @@
 // DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{command}
@@ -17,6 +17,8 @@
 }>
 
 module {
+  func.func private @printMemrefF64(%ptr : tensor<*xf64>)
+
   //
   // Column-wise storage forces the ijk loop to permute into jki
   // so that access pattern expansion (workspace) needs to be
@@ -63,19 +65,18 @@ module {
        : (tensor<8x2xf64, #CSC>,
           tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
 
-    //
-    // CHECK:    ( ( 32.53, 35.73, 38.93, 42.13 ),
-    // CHECK-SAME: ( 34.56, 37.96, 41.36, 44.76 ),
-    // CHECK-SAME: ( 36.59, 40.19, 43.79, 47.39 ),
-    // CHECK-SAME: ( 38.62, 42.42, 46.22, 50.02 ),
-    // CHECK-SAME: ( 40.65, 44.65, 48.65, 52.65 ),
-    // CHECK-SAME: ( 42.68, 46.88, 51.08, 55.28 ),
-    // CHECK-SAME: ( 44.71, 49.11, 53.51, 57.91 ),
-    // CHECK-SAME: ( 46.74, 51.34, 55.94, 60.54 ) )
+    // CHECK:      {{\[}}[32.53,   35.73,   38.93,   42.13],
+    // CHECK-NEXT: [34.56,   37.96,   41.36,   44.76],
+    // CHECK-NEXT: [36.59,   40.19,   43.79,   47.39],
+    // CHECK-NEXT: [38.62,   42.42,   46.22,   50.02],
+    // CHECK-NEXT: [40.65,   44.65,   48.65,   52.65],
+    // CHECK-NEXT: [42.68,   46.88,   51.08,   55.28],
+    // CHECK-NEXT: [44.71,   49.11,   53.51,   57.91],
+    // CHECK-NEXT: [46.74,   51.34,   55.94,   60.54]]
     //
     %xc = sparse_tensor.convert %x3 : tensor<8x4xf64, #CSC> to tensor<8x4xf64>
-    %xv = vector.transfer_read %xc[%c0, %c0], %d1 : tensor<8x4xf64>, vector<8x4xf64>
-    vector.print %xv : vector<8x4xf64>
+    %xu = tensor.cast %xc : tensor<8x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%xu) : (tensor<*xf64>) -> ()
 
     // Release the resources.
     bufferization.dealloc_tensor %x1 : tensor<8x2xf64, #CSC>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
index 228d64587659b..ceb345f26343e 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
@@ -3,7 +3,7 @@
 // DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/test.tns" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{command}
@@ -56,6 +56,7 @@ module {
   }
 
   func.func private @getTensorFilename(index) -> (!Filename)
+  func.func private @printMemrefF64(%ptr : tensor<*xf64>)
 
   //
   // Main driver that reads tensor from file and calls the sparse kernel.
@@ -80,18 +81,16 @@ module {
 
     // Print the result for verification.
     //
-    // CHECK: ( 6.25, 0, 0 )
-    // CHECK: ( 4.224, 6.21, 0 )
-    // CHECK: ( 0, 0, 15.455 )
-    // CHECK: ( 0, 0, 0 )
-    // CHECK: ( 0, 0, 0 )
-    // CHECK: ( 0, 0, 0 )
-    // CHECK: ( 7, 0, 0 )
+    // CHECK:      {{\[}}[6.25,   0,   0],
+    // CHECK-NEXT: [4.224,   6.21,   0],
+    // CHECK-NEXT: [0,   0,   15.455],
+    // CHECK-NEXT: [0,   0,   0],
+    // CHECK-NEXT: [0,   0,   0],
+    // CHECK-NEXT: [0,   0,   0],
+    // CHECK-NEXT: [7,   0,   0]]
     //
-    scf.for %i = %c0 to %c7 step %c1 {
-      %v = vector.transfer_read %0[%i, %c0], %d0: tensor<7x3xf64>, vector<3xf64>
-      vector.print %v : vector<3xf64>
-    }
+    %1 = tensor.cast %0 : tensor<7x3xf64> to tensor<*xf64>
+    call @printMemrefF64(%1) : (tensor<*xf64>) -> ()
 
     // Release the resources.
     bufferization.dealloc_tensor %a : tensor<7x3x3x3x3x3x5x3xf64, #SparseTensor>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
index aea38b5905d14..5bbeb1d6d6b35 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
@@ -2,13 +2,13 @@
 // DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{command}
 //
 // Do the same run, but now with direct IR generation.
-// REDEFINE: %{option} = enable-runtime-library=false
+// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true"
 // RUN: %{command}
 //
 // Do the same run, but now with parallelization strategy.
@@ -16,7 +16,7 @@
 // RUN: %{command}
 //
 // Do the same run, but now with direct IR generation and parallelization strategy.
-// REDEFINE: %{option} = "enable-runtime-library=false parallelization-strategy=any-storage-any-loop"
+// REDEFINE: %{option} = "enable-runtime-library=false enable-buffer-initialization=true parallelization-strategy=any-storage-any-loop"
 // RUN: %{command}
 
 #CSR = #sparse_tensor.encoding<{
@@ -30,6 +30,9 @@
 }>
 
 module {
+  func.func private @printMemrefF64(%ptr : tensor<*xf64>)
+  func.func private @printMemref1dF64(%ptr : memref<?xf64>) attributes { llvm.emit_c_interface }
+
   //
   // Computes C = A x B with all matrices dense.
   //
@@ -70,7 +73,6 @@ module {
   //
   func.func @entry() {
     %c0 = arith.constant 0 : index
-    %d1 = arith.constant -1.0 : f64
 
     // Initialize various matrices, dense for stress testing,
     // and sparse to verify correct nonzero structure.
@@ -178,95 +180,102 @@ module {
           tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
 
     //
-    // CHECK:    ( ( 388.76, 425.56, 462.36, 499.16 ),
-    // CHECK-SAME: ( 397.12, 434.72, 472.32, 509.92 ),
-    // CHECK-SAME: ( 405.48, 443.88, 482.28, 520.68 ),
-    // CHECK-SAME: ( 413.84, 453.04, 492.24, 531.44 ) )
+    // CHECK:      {{\[}}[388.76,   425.56,   462.36,   499.16],
+    // CHECK-NEXT: [397.12,   434.72,   472.32,   509.92],
+    // CHECK-NEXT: [405.48,   443.88,   482.28,   520.68],
+    // CHECK-NEXT: [413.84,   453.04,   492.24,   531.44]]
     //
-    %v0 = vector.transfer_read %0[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v0 : vector<4x4xf64>
+    %u0 = tensor.cast %0 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%u0) : (tensor<*xf64>) -> ()
 
     //
-    // CHECK:    ( ( 388.76, 425.56, 462.36, 499.16 ),
-    // CHECK-SAME: ( 397.12, 434.72, 472.32, 509.92 ),
-    // CHECK-SAME: ( 405.48, 443.88, 482.28, 520.68 ),
-    // CHECK-SAME: ( 413.84, 453.04, 492.24, 531.44 ) )
+    // CHECK:      {{\[}}[388.76,   425.56,   462.36,   499.16],
+    // CHECK-NEXT: [397.12,   434.72,   472.32,   509.92],
+    // CHECK-NEXT: [405.48,   443.88,   482.28,   520.68],
+    // CHECK-NEXT: [413.84,   453.04,   492.24,   531.44]]
     //
     %c1 = sparse_tensor.convert %1 : tensor<4x4xf64, #CSR> to tensor<4x4xf64>
-    %v1 = vector.transfer_read %c1[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v1 : vector<4x4xf64>
+    %c1u = tensor.cast %c1 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%c1u) : (tensor<*xf64>) -> ()
 
     //
-    // CHECK:    ( ( 388.76, 425.56, 462.36, 499.16 ),
-    // CHECK-SAME: ( 397.12, 434.72, 472.32, 509.92 ),
-    // CHECK-SAME: ( 405.48, 443.88, 482.28, 520.68 ),
-    // CHECK-SAME: ( 413.84, 453.04, 492.24, 531.44 ) )
+    // CHECK:      {{\[}}[388.76,   425.56,   462.36,   499.16],
+    // CHECK-NEXT: [397.12,   434.72,   472.32,   509.92],
+    // CHECK-NEXT: [405.48,   443.88,   482.28,   520.68],
+    // CHECK-NEXT: [413.84,   453.04,   492.24,   531.44]]
     //
     %c2 = sparse_tensor.convert %2 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64>
-    %v2 = vector.transfer_read %c2[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v2 : vector<4x4xf64>
+    %c2u = tensor.cast %c2 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%c2u) : (tensor<*xf64>) -> ()
 
     //
-    // CHECK:    ( ( 86.08, 94.28, 102.48, 110.68 ),
-    // CHECK-SAME: ( 0, 0, 0, 0 ),
-    // CHECK-SAME: ( 23.46, 25.76, 28.06, 30.36 ),
-    // CHECK-SAME: ( 10.8, 11.8, 12.8, 13.8 ) )
+    // CHECK:      {{\[}}[86.08,   94.28,   102.48,   110.68],
+    // CHECK-NEXT: [0,   0,   0,   0],
+    // CHECK-NEXT: [23.46,   25.76,   28.06,   30.36],
+    // CHECK-NEXT: [10.8,   11.8,   12.8,   13.8]]
     //
-    %v3 = vector.transfer_read %3[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v3 : vector<4x4xf64>
+    %u3 = tensor.cast %3 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%u3) : (tensor<*xf64>) -> ()
 
     //
-    // CHECK:    ( ( 86.08, 94.28, 102.48, 110.68 ),
-    // CHECK-SAME: ( 0, 0, 0, 0 ),
-    // CHECK-SAME: ( 23.46, 25.76, 28.06, 30.36 ),
-    // CHECK-SAME: ( 10.8, 11.8, 12.8, 13.8 ) )
+    // CHECK:      {{\[}}[86.08,   94.28,   102.48,   110.68],
+    // CHECK-NEXT: [0,   0,   0,   0],
+    // CHECK-NEXT: [23.46,   25.76,   28.06,   30.36],
+    // CHECK-NEXT: [10.8,   11.8,   12.8,   13.8]]
     //
     %c4 = sparse_tensor.convert %4 : tensor<4x4xf64, #CSR> to tensor<4x4xf64>
-    %v4 = vector.transfer_read %c4[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v4 : vector<4x4xf64>
+    %c4u = tensor.cast %c4 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%c4u) : (tensor<*xf64>) -> ()
 
     //
-    // CHECK:    ( ( 86.08, 94.28, 102.48, 110.68 ),
-    // CHECK-SAME: ( 0, 0, 0, 0 ),
-    // CHECK-SAME: ( 23.46, 25.76, 28.06, 30.36 ),
-    // CHECK-SAME: ( 10.8, 11.8, 12.8, 13.8 ) )
+    // CHECK:      {{\[}}[86.08,   94.28,   102.48,   110.68],
+    // CHECK-NEXT: [0,   0,   0,   0],
+    // CHECK-NEXT: [23.46,   25.76,   28.06,   30.36],
+    // CHECK-NEXT: [10.8,   11.8,   12.8,   13.8]]
     //
     %c5 = sparse_tensor.convert %5 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64>
-    %v5 = vector.transfer_read %c5[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v5 : vector<4x4xf64>
+    %c5u = tensor.cast %c5 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%c5u) : (tensor<*xf64>) -> ()
 
     //
-    // CHECK-NEXT: ( ( 0, 30.5, 4.2, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 4.6, 0 ), ( 0, 0, 7, 8 ) )
+    // CHECK:      {{\[}}[0,   30.5,   4.2,   0],
+    // CHECK-NEXT: [0,   0,   0,   0],
+    // CHECK-NEXT: [0,   0,   4.6,   0],
+    // CHECK-NEXT: [0,   0,   7,   8]]
     //
-    %v6 = vector.transfer_read %6[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v6 : vector<4x4xf64>
+    %u6 = tensor.cast %6 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%u6) : (tensor<*xf64>) -> ()
 
     //
-    // CHECK-NEXT: ( ( 0, 30.5, 4.2, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 4.6, 0 ), ( 0, 0, 7, 8 ) )
+    // CHECK:      {{\[}}[0,   30.5,   4.2,   0],
+    // CHECK-NEXT: [0,   0,   0,   0],
+    // CHECK-NEXT: [0,   0,   4.6,   0],
+    // CHECK-NEXT: [0,   0,   7,   8]]
     //
     %c7 = sparse_tensor.convert %7 : tensor<4x4xf64, #CSR> to tensor<4x4xf64>
-    %v7 = vector.transfer_read %c7[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v7 : vector<4x4xf64>
+    %c7u = tensor.cast %c7 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%c7u) : (tensor<*xf64>) -> ()
 
     //
-    // CHECK-NEXT: ( ( 0, 30.5, 4.2, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 4.6, 0 ), ( 0, 0, 7, 8 ) )
+    // CHECK:      {{\[}}[0,   30.5,   4.2,   0],
+    // CHECK-NEXT: [0,   0,   0,   0],
+    // CHECK-NEXT: [0,   0,   4.6,   0],
+    // CHECK-NEXT: [0,   0,   7,   8]]
     //
     %c8 = sparse_tensor.convert %8 : tensor<4x4xf64, #DCSR> to tensor<4x4xf64>
-    %v8 = vector.transfer_read %c8[%c0, %c0], %d1 : tensor<4x4xf64>, vector<4x4xf64>
-    vector.print %v8 : vector<4x4xf64>
+    %c8u = tensor.cast %c8 : tensor<4x4xf64> to tensor<*xf64>
+    call @printMemrefF64(%c8u) : (tensor<*xf64>) -> ()
 
     //
     // Sanity check on nonzeros.
     //
-    // CHECK-NEXT: ( 30.5, 4.2, 4.6, 7, 8 )
-    // CHECK-NEXT: ( 30.5, 4.2, 4.6, 7, 8 )
+    // CHECK: [30.5,  4.2,  4.6,  7,  8
+    // CHECK: [30.5,  4.2,  4.6,  7,  8
     //
     %val7 = sparse_tensor.values %7 : tensor<4x4xf64, #CSR> to memref<?xf64>
     %val8 = sparse_tensor.values %8 : tensor<4x4xf64, #DCSR> to memref<?xf64>
-    %nz7 = vector.transfer_read %val7[%c0], %d1 : memref<?xf64>, vector<5xf64>
-    %nz8 = vector.transfer_read %val8[%c0], %d1 : memref<?xf64>, vector<5xf64>
-    vector.print %nz7 : vector<5xf64>
-    vector.print %nz8 : vector<5xf64>
+    call @printMemref1dF64(%val7) : (memref<?xf64>) -> ()
+    call @printMemref1dF64(%val8) : (memref<?xf64>) -> ()
 
     //
     // Sanity check on stored entries after the computations.
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
index 79f91f0dca1aa..374216138c8d6 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
@@ -2,7 +2,7 @@
 // DEFINE: %{command} = mlir-opt %s --sparse-compiler=%{option} | \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{command}
@@ -42,6 +42,8 @@
 }
 
 module {
+  func.func private @printMemrefF64(%ptr : tensor<*xf64>)
+
   // Scales a sparse matrix into a new sparse matrix.
   func.func @matrix_scale(%arga: tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> {
     %s = arith.constant 2.0 : f64
@@ -110,11 +112,9 @@ module {
 
   // Dump a sparse matrix.
   func.func @dump(%arg0: tensor<?x?xf64, #DCSR>) {
-    %d0 = arith.constant 0.0 : f64
-    %c0 = arith.constant 0 : index
     %dm = sparse_tensor.convert %arg0 : tensor<?x?xf64, #DCSR> to tensor<?x?xf64>
-    %1 = vector.transfer_read %dm[%c0, %c0], %d0: tensor<?x?xf64>, vector<4x8xf64>
-    vector.print %1 : vector<4x8xf64>
+    %u = tensor.cast %dm : tensor<?x?xf64> to tensor<*xf64>
+    call @printMemrefF64(%u) : (tensor<*xf64>) -> ()
     return
   }
 
@@ -150,12 +150,30 @@ module {
     //
     // Verify the results.
     //
-    // CHECK:      ( ( 1, 2, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 3 ), ( 0, 0, 4, 0, 5, 0, 0, 6 ), ( 7, 0, 8, 9, 0, 0, 0, 0 ) )
-    // CHECK-NEXT: ( ( 6, 0, 0, 0, 0, 0, 0, 5 ), ( 4, 0, 0, 0, 0, 0, 3, 0 ), ( 0, 2, 0, 0, 0, 0, 0, 1 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) )
-    // CHECK-NEXT: ( ( 2, 4, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 6 ), ( 0, 0, 8, 0, 10, 0, 0, 12 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) )
-    // CHECK-NEXT: ( ( 2, 4, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 6 ), ( 0, 0, 8, 0, 10, 0, 0, 12 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) )
-    // CHECK-NEXT: ( ( 8, 4, 0, 0, 0, 0, 0, 5 ), ( 4, 0, 0, 0, 0, 0, 3, 6 ), ( 0, 2, 8, 0, 10, 0, 0, 13 ), ( 14, 0, 16, 18, 0, 0, 0, 0 ) )
-    // CHECK-NEXT: ( ( 12, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ), ( 0, 0, 0, 0, 0, 0, 0, 12 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) )
+    // CHECK:      {{\[}}[1,   2,   0,   0,   0,   0,   0,   0],
+    // CHECK-NEXT: [0,   0,   0,   0,   0,   0,   0,   3],
+    // CHECK-NEXT: [0,   0,   4,   0,   5,   0,   0,   6],
+    // CHECK-NEXT: [7,   0,   8,   9,   0,   0,   0,   0]]
+    // CHECK:      {{\[}}[6,   0,   0,   0,   0,   0,   0,   5],
+    // CHECK-NEXT: [4,   0,   0,   0,   0,   0,   3,   0],
+    // CHECK-NEXT: [0,   2,   0,   0,   0,   0,   0,   1],
+    // CHECK-NEXT: [0,   0,   0,   0,   0,   0,   0,   0]]
+    // CHECK:      {{\[}}[2,   4,   0,   0,   0,   0,   0,   0],
+    // CHECK-NEXT: [0,   0,   0,   0,   0,   0,   0,   6],
+    // CHECK-NEXT: [0,   0,   8,   0,   10,   0,   0,   12],
+    // CHECK-NEXT: [14,   0,   16,   18,   0,   0,   0,   0]]
+    // CHECK:      {{\[}}[2,   4,   0,   0,   0,   0,   0,   0],
+    // CHECK-NEXT: [0,   0,   0,   0,   0,   0,   0,   6],
+    // CHECK-NEXT: [0,   0,   8,   0,   10,   0,   0,   12],
+    // CHECK-NEXT: [14,   0,   16,   18,   0,   0,   0,   0]]
+    // CHECK:      {{\[}}[8,   4,   0,   0,   0,   0,   0,   5],
+    // CHECK-NEXT: [4,   0,   0,   0,   0,   0,   3,   6],
+    // CHECK-NEXT: [0,   2,   8,   0,   10,   0,   0,   13],
+    // CHECK-NEXT: [14,   0,   16,   18,   0,   0,   0,   0]]
+    // CHECK:      {{\[}}[12,   0,   0,   0,   0,   0,   0,   0],
+    // CHECK-NEXT: [0,   0,   0,   0,   0,   0,   0,   0],
+    // CHECK-NEXT: [0,   0,   0,   0,   0,   0,   0,   12],
+    // CHECK-NEXT: [0,   0,   0,   0,   0,   0,   0,   0]]
     //
     call @dump(%sm1) : (tensor<?x?xf64, #DCSR>) -> ()
     call @dump(%sm2) : (tensor<?x?xf64, #DCSR>) -> ()
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
index 74b3b2c921049..b348c561a6349 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
@@ -3,7 +3,7 @@
 // DEFINE: TENSOR0="%mlir_src_dir/test/Integration/data/mttkrp_b.tns" \
 // DEFINE: mlir-cpu-runner \
 // DEFINE:  -e entry -entry-point-result=void  \
-// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// DEFINE:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // DEFINE: FileCheck %s
 //
 // RUN: %{command}
@@ -35,6 +35,8 @@
 // from file, and runs the resulting code with the JIT compiler.
 //
 module {
+  func.func private @printMemrefF64(%ptr : tensor<*xf64>)
+
   //
   // Computes Matricized Tensor Times Khatri-Rao Product (MTTKRP) kernel. See
   // http://tensor-compiler.org/docs/data_analytics/index.html.
@@ -112,12 +114,11 @@ module {
 
     // Print the result for verification.
     //
-    // CHECK: ( ( 16075, 21930, 28505, 35800, 43815 ),
-    // CHECK:   ( 10000, 14225, 19180, 24865, 31280 ) )
+    // CHECK:      {{\[}}[16075,   21930,   28505,   35800,   43815],
+    // CHECK-NEXT: [10000,   14225,   19180,   24865,   31280]]
     //
-    %v = vector.transfer_read %0[%cst0, %cst0], %f0
-          : tensor<?x?xf64>, vector<2x5xf64>
-    vector.print %v : vector<2x5xf64>
+    %u = tensor.cast %0: tensor<?x?xf64> to tensor<*xf64>
+    call @printMemrefF64(%u) : (tensor<*xf64>) -> ()
 
     // Release the resources.
     bufferization.dealloc_tensor %b : tensor<?x?x?xf64, #SparseTensor>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir
index f0937e238af58..c9ee528735cf5 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_rewrite_sort.mlir
@@ -1,10 +1,12 @@
 // RUN: mlir-opt %s --sparse-compiler=enable-runtime-library=false | \
 // RUN: mlir-cpu-runner \
 // RUN:  -e entry -entry-point-result=void  \
-// RUN:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
+// RUN:  -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext,%mlir_lib_dir/libmlir_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
 module {
+  func.func private @printMemref1dI32(%ptr : memref<?xi32>) attributes { llvm.emit_c_interface }
+
   // Stores 5 values to the memref buffer.
   func.func @storeValuesTo(%b: memref<?xi32>, %v0: i32, %v1: i32, %v2: i32,
     %v3: i32, %v4: i32) -> () {
@@ -47,28 +49,24 @@ module {
       : (memref<?xi32>, i32, i32, i32, i32, i32) -> ()
 
     // Sort 0 elements.
-    // CHECK: ( 10, 2, 0, 5, 1 )
+    // CHECK: [10,  2,  0,  5,  1]
     sparse_tensor.sort %i0, %x0 : memref<?xi32>
-    %x0v0 = vector.transfer_read %x0[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x0v0 : vector<5xi32>
+    call @printMemref1dI32(%x0) : (memref<?xi32>) -> ()
     // Stable sort.
-    // CHECK: ( 10, 2, 0, 5, 1 )
+    // CHECK: [10,  2,  0,  5,  1]
     sparse_tensor.sort stable %i0, %x0 : memref<?xi32>
-    %x0v0s = vector.transfer_read %x0[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x0v0s : vector<5xi32>
+    call @printMemref1dI32(%x0) : (memref<?xi32>) -> ()
 
     // Sort the first 4 elements, with the last valid value untouched.
-    // CHECK: ( 0, 2, 5, 10, 1 )
+    // CHECK: [0,  2,  5, 10,  1]
     sparse_tensor.sort %i4, %x0 : memref<?xi32>
-    %x0v1 = vector.transfer_read %x0[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x0v1 : vector<5xi32>
+    call @printMemref1dI32(%x0) : (memref<?xi32>) -> ()
     // Stable sort.
-    // CHECK: ( 0, 2, 5, 10, 1 )
+    // CHECK: [0,  2,  5,  10,  1]
     call @storeValuesTo(%x0, %c10, %c2, %c0, %c5, %c1)
       : (memref<?xi32>, i32, i32, i32, i32, i32) -> ()
     sparse_tensor.sort stable %i4, %x0 : memref<?xi32>
-    %x0v1s = vector.transfer_read %x0[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x0v1s : vector<5xi32>
+    call @printMemref1dI32(%x0) : (memref<?xi32>) -> ()
 
     // Prepare more buffers of different dimensions.
     %x1s = memref.alloc() : memref<10xi32>
@@ -79,10 +77,10 @@ module {
     %y0 = memref.cast %y0s : memref<7xi32> to memref<?xi32>
 
     // Sort "parallel arrays".
-    // CHECK: ( 1, 1, 2, 5, 10 )
-    // CHECK: ( 3, 3, 1, 10, 1 )
-    // CHECK: ( 9, 9, 4, 7, 2 )
-    // CHECK: ( 8, 7, 10, 9, 6 )
+    // CHECK: [1,  1,  2,  5,  10]
+    // CHECK: [3,  3,  1,  10,  1
+    // CHECK: [9,  9,  4,  7,  2
+    // CHECK: [8,  7,  10,  9,  6
     call @storeValuesTo(%x0, %c10, %c2, %c1, %c5, %c1)
       : (memref<?xi32>, i32, i32, i32, i32, i32) -> ()
     call @storeValuesTo(%x1, %c1, %c1, %c3, %c10, %c3)
@@ -93,19 +91,15 @@ module {
       : (memref<?xi32>, i32, i32, i32, i32, i32) -> ()
     sparse_tensor.sort %i5, %x0, %x1, %x2 jointly %y0
       : memref<?xi32>, memref<?xi32>, memref<?xi32> jointly memref<?xi32>
-    %x0v2 = vector.transfer_read %x0[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x0v2 : vector<5xi32>
-    %x1v = vector.transfer_read %x1[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x1v : vector<5xi32>
-    %x2v = vector.transfer_read %x2[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x2v : vector<5xi32>
-    %y0v = vector.transfer_read %y0[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %y0v : vector<5xi32>
+    call @printMemref1dI32(%x0) : (memref<?xi32>) -> ()
+    call @printMemref1dI32(%x1) : (memref<?xi32>) -> ()
+    call @printMemref1dI32(%x2) : (memref<?xi32>) -> ()
+    call @printMemref1dI32(%y0) : (memref<?xi32>) -> ()
     // Stable sort.
-    // CHECK: ( 1, 1, 2, 5, 10 )
-    // CHECK: ( 3, 3, 1, 10, 1 )
-    // CHECK: ( 9, 9, 4, 7, 2 )
-    // CHECK: ( 8, 7, 10, 9, 6 )
+    // CHECK: [1,  1,  2,  5,  10]
+    // CHECK: [3,  3,  1,  10,  1
+    // CHECK: [9,  9,  4,  7,  2
+    // CHECK: [8,  7,  10,  9,  6
     call @storeValuesTo(%x0, %c10, %c2, %c1, %c5, %c1)
       : (memref<?xi32>, i32, i32, i32, i32, i32) -> ()
     call @storeValuesTo(%x1, %c1, %c1, %c3, %c10, %c3)
@@ -116,14 +110,10 @@ module {
       : (memref<?xi32>, i32, i32, i32, i32, i32) -> ()
     sparse_tensor.sort stable %i5, %x0, %x1, %x2 jointly %y0
       : memref<?xi32>, memref<?xi32>, memref<?xi32> jointly memref<?xi32>
-    %x0v2s = vector.transfer_read %x0[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x0v2s : vector<5xi32>
-    %x1vs = vector.transfer_read %x1[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x1vs : vector<5xi32>
-    %x2vs = vector.transfer_read %x2[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %x2vs : vector<5xi32>
-    %y0vs = vector.transfer_read %y0[%i0], %c100: memref<?xi32>, vector<5xi32>
-    vector.print %y0vs : vector<5xi32>
+    call @printMemref1dI32(%x0) : (memref<?xi32>) -> ()
+    call @printMemref1dI32(%x1) : (memref<?xi32>) -> ()
+    call @printMemref1dI32(%x2) : (memref<?xi32>) -> ()
+    call @printMemref1dI32(%y0) : (memref<?xi32>) -> ()
 
     // Release the buffers.
     memref.dealloc %x0 : memref<?xi32>