diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
index 5d67f3d363a88..a8c96e0e97187 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
@@ -35,6 +35,11 @@
   doc = "X(i,j) = A(i,j) OP B(i,j)"
 }
 
+//
+// Contains test cases for the sparse_tensor.binary operator (different cases when left/right/overlap
+// is empty/identity, etc).
+//
+
 module {
   // Creates a new sparse vector using the minimum values from two input sparse vectors.
   // When there is no overlap, include the present value in the output.
@@ -158,7 +163,187 @@ module {
     return %0 : tensor<?x?xf64, #DCSR>
   }
 
-  // Dumps a sparse vector of type f64.
+  // Tensor addition (use semi-ring binary operation).
+  func.func @add_tensor_1(%A: tensor<4x4xf64, #DCSR>,
+                          %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
+    %0 = linalg.generic #trait_mat_op
+      ins(%A, %B: tensor<4x4xf64, #DCSR>,
+                  tensor<4x4xf64, #DCSR>)
+      outs(%C: tensor<4x4xf64, #DCSR>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                %ret = arith.addf %x, %y : f64
+                sparse_tensor.yield %ret : f64
+            }
+            left=identity
+            right=identity
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #DCSR>
+    return %0 : tensor<4x4xf64, #DCSR>
+  }
+
+  // Same as @add_tensor_1, but use sparse_tensor.yield instead of identity to yield value.
+  func.func @add_tensor_2(%A: tensor<4x4xf64, #DCSR>,
+                          %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
+    %0 = linalg.generic #trait_mat_op
+      ins(%A, %B: tensor<4x4xf64, #DCSR>,
+                  tensor<4x4xf64, #DCSR>)
+      outs(%C: tensor<4x4xf64, #DCSR>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                %ret = arith.addf %x, %y : f64
+                sparse_tensor.yield %ret : f64
+            }
+            left={
+              ^bb0(%x: f64):
+                sparse_tensor.yield %x : f64
+            }
+            right={
+              ^bb0(%y: f64):
+                sparse_tensor.yield %y : f64
+            }
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #DCSR>
+    return %0 : tensor<4x4xf64, #DCSR>
+  }
+  
+  // Performs triangular add/sub operation (using semi-ring binary op).
+  func.func @triangular(%A: tensor<4x4xf64, #DCSR>,
+                        %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
+    %0 = linalg.generic #trait_mat_op
+      ins(%A, %B: tensor<4x4xf64, #DCSR>,
+                  tensor<4x4xf64, #DCSR>)
+      outs(%C: tensor<4x4xf64, #DCSR>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %row = linalg.index 0 : index
+          %col = linalg.index 1 : index
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                %cmp = arith.cmpi "uge", %col, %row : index
+                %upperTriangleResult = arith.addf %x, %y : f64
+                %lowerTriangleResult = arith.subf %x, %y : f64
+                %ret = arith.select %cmp, %upperTriangleResult, %lowerTriangleResult : f64
+                sparse_tensor.yield %ret : f64
+            }
+            left=identity
+            right={
+              ^bb0(%y: f64):
+                %cmp = arith.cmpi "uge", %col, %row : index
+                %lowerTriangleResult = arith.negf %y : f64
+                %ret = arith.select %cmp, %y, %lowerTriangleResult : f64
+                sparse_tensor.yield %ret : f64
+            }
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #DCSR>
+    return %0 : tensor<4x4xf64, #DCSR>
+  }
+
+  // Perform sub operation (using semi-ring binary op) with a constant threshold.
+  func.func @sub_with_thres(%A: tensor<4x4xf64, #DCSR>,
+                            %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
+    // Defines out-block constant bounds.
+    %thres_out_up = arith.constant 2.0 : f64
+    %thres_out_lo = arith.constant -2.0 : f64
+    
+    %0 = linalg.generic #trait_mat_op
+      ins(%A, %B: tensor<4x4xf64, #DCSR>,
+                  tensor<4x4xf64, #DCSR>)
+      outs(%C: tensor<4x4xf64, #DCSR>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                // Defines in-block constant bounds.
+                %thres_up = arith.constant 1.0 : f64
+                %thres_lo = arith.constant -1.0 : f64
+                %result = arith.subf %x, %y : f64
+                %cmp = arith.cmpf "oge", %result, %thres_up : f64
+                %tmp = arith.select %cmp, %thres_up, %result : f64
+                %cmp1 = arith.cmpf "ole", %tmp, %thres_lo : f64
+                %ret = arith.select %cmp1, %thres_lo, %tmp : f64
+                sparse_tensor.yield %ret : f64
+            }
+            left={
+              ^bb0(%x: f64):
+                // Uses out-block constant bounds.
+                %cmp = arith.cmpf "oge", %x, %thres_out_up : f64
+                %tmp = arith.select %cmp, %thres_out_up, %x : f64
+                %cmp1 = arith.cmpf "ole", %tmp, %thres_out_lo : f64
+                %ret = arith.select %cmp1, %thres_out_lo, %tmp : f64
+                sparse_tensor.yield %ret : f64
+            }
+            right={
+              ^bb0(%y: f64):
+                %ny = arith.negf %y : f64
+                %cmp = arith.cmpf "oge", %ny, %thres_out_up : f64
+                %tmp = arith.select %cmp, %thres_out_up, %ny : f64
+                %cmp1 = arith.cmpf "ole", %tmp, %thres_out_lo : f64
+                %ret = arith.select %cmp1, %thres_out_lo, %tmp : f64
+                sparse_tensor.yield %ret : f64          
+            }
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #DCSR>
+    return %0 : tensor<4x4xf64, #DCSR>
+  }
+
+  // Performs isEqual only on intersecting elements.
+  func.func @intersect_equal(%A: tensor<4x4xf64, #DCSR>,
+                             %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xi8, #DCSR>
+    %0 = linalg.generic #trait_mat_op
+      ins(%A, %B: tensor<4x4xf64, #DCSR>,
+                  tensor<4x4xf64, #DCSR>)
+      outs(%C: tensor<4x4xi8, #DCSR>) {
+        ^bb0(%a: f64, %b: f64, %c: i8) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to i8
+            overlap={
+              ^bb0(%x: f64, %y: f64):
+                %cmp = arith.cmpf "oeq", %x, %y : f64
+                %ret = arith.extui %cmp : i1 to i8
+                sparse_tensor.yield %ret : i8
+            }
+            left={}
+            right={}
+          linalg.yield %result : i8
+      } -> tensor<4x4xi8, #DCSR>
+    return %0 : tensor<4x4xi8, #DCSR>
+  }
+
+  // Keeps values on left, negate value on right, ignore value when overlapping.
+  func.func @only_left_right(%A: tensor<4x4xf64, #DCSR>,
+                             %B: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
+    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
+    %0 = linalg.generic #trait_mat_op
+      ins(%A, %B: tensor<4x4xf64, #DCSR>,
+                  tensor<4x4xf64, #DCSR>)
+      outs(%C: tensor<4x4xf64, #DCSR>) {
+        ^bb0(%a: f64, %b: f64, %c: f64) :
+          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
+            overlap={}
+            left=identity
+            right={
+              ^bb0(%y: f64):
+                %ret = arith.negf %y : f64
+                sparse_tensor.yield %ret : f64
+            }
+          linalg.yield %result : f64
+      } -> tensor<4x4xf64, #DCSR>
+    return %0 : tensor<4x4xf64, #DCSR>
+  }
+  
+  //
+  // Utility functions to dump the value of a tensor.
+  //
+
   func.func @dump_vec(%arg0: tensor<?xf64, #SparseVector>) {
     // Dump the values array to verify only sparse contents are stored.
     %c0 = arith.constant 0 : index
@@ -175,7 +360,6 @@ module {
     return
   }
 
-  // Dumps a sparse vector of type i32.
   func.func @dump_vec_i32(%arg0: tensor<?xi32, #SparseVector>) {
     // Dump the values array to verify only sparse contents are stored.
     %c0 = arith.constant 0 : index
@@ -192,7 +376,6 @@ module {
     return
   }
 
-  // Dump a sparse matrix.
   func.func @dump_mat(%arg0: tensor<?x?xf64, #DCSR>) {
     %d0 = arith.constant 0.0 : f64
     %c0 = arith.constant 0 : index
@@ -204,7 +387,43 @@ module {
     return
   }
 
-  // Driver method to call and verify vector kernels.
+  func.func @dump_mat_4x4(%A: tensor<4x4xf64, #DCSR>) {
+    %c0 = arith.constant 0 : index
+    %du = arith.constant -1.0 : f64
+
+    %c = sparse_tensor.convert %A : tensor<4x4xf64, #DCSR> to tensor<4x4xf64>
+    %m = bufferization.to_memref %c : memref<4x4xf64>
+    %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x4xf64>, vector<4x4xf64>
+    vector.print %v : vector<4x4xf64>
+    
+    %1 = sparse_tensor.values %A : tensor<4x4xf64, #DCSR> to memref<?xf64>
+    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<16xf64>
+    vector.print %2 : vector<16xf64>
+    
+    // Release the resources.
+    memref.dealloc %m : memref<4x4xf64>
+    return
+  }
+
+  func.func @dump_mat_4x4_i8(%A: tensor<4x4xi8, #DCSR>) {
+    %c0 = arith.constant 0 : index
+    %du = arith.constant -1 : i8
+
+    %c = sparse_tensor.convert %A : tensor<4x4xi8, #DCSR> to tensor<4x4xi8>
+    %m = bufferization.to_memref %c : memref<4x4xi8>
+    %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x4xi8>, vector<4x4xi8>
+    vector.print %v : vector<4x4xi8>
+    
+    %1 = sparse_tensor.values %A : tensor<4x4xi8, #DCSR> to memref<?xi8>
+    %2 = vector.transfer_read %1[%c0], %du: memref<?xi8>, vector<16xi8>
+    vector.print %2 : vector<16xi8>
+    
+    // Release the resources.
+    memref.dealloc %m : memref<4x4xi8>
+    return
+  }
+  
+  // Driver method to call and verify kernels.
   func.func @entry() {
     %c0 = arith.constant 0 : index
 
@@ -237,6 +456,20 @@ module {
     %sm1 = sparse_tensor.convert %m1 : tensor<4x8xf64> to tensor<?x?xf64, #DCSR>
     %sm2 = sparse_tensor.convert %m2 : tensor<4x8xf64> to tensor<?x?xf64, #DCSR>
 
+    %m3 = arith.constant dense<
+      [ [ 1.0, 0.0, 3.0, 0.0],
+        [ 0.0, 2.0, 0.0, 0.0],
+        [ 0.0, 0.0, 0.0, 4.0],
+        [ 3.0, 4.0, 0.0, 0.0] ]> : tensor<4x4xf64>
+    %m4 = arith.constant dense<
+      [ [ 1.0, 0.0, 1.0, 1.0],
+        [ 0.0, 0.5, 0.0, 0.0],
+        [ 1.0, 5.0, 2.0, 0.0],
+        [ 2.0, 0.0, 0.0, 0.0] ]> : tensor<4x4xf64>
+
+    %sm3 = sparse_tensor.convert %m3 : tensor<4x4xf64> to tensor<4x4xf64, #DCSR>
+    %sm4 = sparse_tensor.convert %m4 : tensor<4x4xf64> to tensor<4x4xf64, #DCSR>
+
     // Call sparse vector kernels.
     %0 = call @vector_min(%sv1, %sv2)
        : (tensor<?xf64, #SparseVector>,
@@ -253,6 +486,18 @@ module {
     // Call sparse matrix kernels.
     %5 = call @matrix_intersect(%sm1, %sm2)
       : (tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR>
+    %6 = call @add_tensor_1(%sm3, %sm4)
+      : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
+    %7 = call @add_tensor_2(%sm3, %sm4)
+      : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>    
+    %8 = call @triangular(%sm3, %sm4)
+      : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
+    %9 = call @sub_with_thres(%sm3, %sm4)
+      : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
+    %10 = call @intersect_equal(%sm3, %sm4)
+      : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xi8, #DCSR>
+    %11 = call @only_left_right(%sm3, %sm4)
+      : (tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
 
     //
     // Verify the results.
@@ -270,6 +515,18 @@ module {
     // CHECK-NEXT: ( 0, 3, 11, 17, 20, 21, 28, 29, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
     // CHECK-NEXT: ( 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 17, 0, 0, 20, 21, 0, 0, 0, 0, 0, 0, 28, 29, 0, 31 )
     // CHECK-NEXT: ( ( 7, 0, 0, 0, 0, 0, 0, -5 ), ( -4, 0, 0, 0, 0, 0, -3, 0 ), ( 0, -2, 0, 0, 0, 0, 0, 7 ), ( 0, 0, 0, 0, 0, 0, 0, 0 ) )
+    // CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( 1, 5, 2, 4 ), ( 5, 4, 0, 0 ) )
+    // CHECK-NEXT:   ( 2, 4, 1, 2.5, 1, 5, 2, 4, 5, 4, -1, -1, -1, -1, -1, -1 )
+    // CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( 1, 5, 2, 4 ), ( 5, 4, 0, 0 ) )
+    // CHECK-NEXT:   ( 2, 4, 1, 2.5, 1, 5, 2, 4, 5, 4, -1, -1, -1, -1, -1, -1 )
+    // CHECK-NEXT: ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( -1, -5, 2, 4 ), ( 1, 4, 0, 0 ) )
+    // CHECK-NEXT:   ( 2, 4, 1, 2.5, -1, -5, 2, 4, 1, 4, -1, -1, -1, -1, -1, -1 )
+    // CHECK-NEXT: ( ( 0, 0, 1, -1 ), ( 0, 1, 0, 0 ), ( -1, -2, -2, 2 ), ( 1, 2, 0, 0 ) )
+    // CHECK-NEXT:   ( 0, 1, -1, 1, -1, -2, -2, 2, 1, 2, -1, -1, -1, -1, -1, -1 )
+    // CHECK-NEXT: ( ( 1, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ) )
+    // CHECK-NEXT:   ( 1, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
+    // CHECK-NEXT: ( ( 0, 0, 0, -1 ), ( 0, 0, 0, 0 ), ( -1, -5, -2, 4 ), ( 0, 4, 0, 0 ) )
+    // CHECK-NEXT:   ( -1, -1, -5, -2, 4, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 )
     //
     call @dump_vec(%sv1) : (tensor<?xf64, #SparseVector>) -> ()
     call @dump_vec(%sv2) : (tensor<?xf64, #SparseVector>) -> ()
@@ -278,17 +535,31 @@ module {
     call @dump_vec(%2) : (tensor<?xf64, #SparseVector>) -> ()
     call @dump_vec_i32(%3) : (tensor<?xi32, #SparseVector>) -> ()
     call @dump_mat(%5) : (tensor<?x?xf64, #DCSR>) -> ()
-
+    call @dump_mat_4x4(%6) : (tensor<4x4xf64, #DCSR>) -> ()
+    call @dump_mat_4x4(%7) : (tensor<4x4xf64, #DCSR>) -> ()
+    call @dump_mat_4x4(%8) : (tensor<4x4xf64, #DCSR>) -> ()
+    call @dump_mat_4x4(%9) : (tensor<4x4xf64, #DCSR>) -> ()
+    call @dump_mat_4x4_i8(%10) : (tensor<4x4xi8, #DCSR>) -> ()
+    call @dump_mat_4x4(%11) : (tensor<4x4xf64, #DCSR>) -> ()
+    
     // Release the resources.
     sparse_tensor.release %sv1 : tensor<?xf64, #SparseVector>
     sparse_tensor.release %sv2 : tensor<?xf64, #SparseVector>
     sparse_tensor.release %sm1 : tensor<?x?xf64, #DCSR>
     sparse_tensor.release %sm2 : tensor<?x?xf64, #DCSR>
+    sparse_tensor.release %sm3 : tensor<4x4xf64, #DCSR>
+    sparse_tensor.release %sm4 : tensor<4x4xf64, #DCSR>
     sparse_tensor.release %0 : tensor<?xf64, #SparseVector>
     sparse_tensor.release %1 : tensor<?xf64, #SparseVector>
     sparse_tensor.release %2 : tensor<?xf64, #SparseVector>
     sparse_tensor.release %3 : tensor<?xi32, #SparseVector>
     sparse_tensor.release %5 : tensor<?x?xf64, #DCSR>
+    sparse_tensor.release %6 : tensor<4x4xf64, #DCSR>
+    sparse_tensor.release %7 : tensor<4x4xf64, #DCSR>
+    sparse_tensor.release %8 : tensor<4x4xf64, #DCSR>
+    sparse_tensor.release %9 : tensor<4x4xf64, #DCSR>
+    sparse_tensor.release %10 : tensor<4x4xi8, #DCSR>
+    sparse_tensor.release %11 : tensor<4x4xf64, #DCSR>
     return
   }
 }
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir
deleted file mode 100644
index 468bc2f847f06..0000000000000
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_triangular_bin.mlir
+++ /dev/null
@@ -1,95 +0,0 @@
-// RUN: mlir-opt %s --sparse-compiler | \
-// RUN: mlir-cpu-runner \
-// RUN:  -e entry -entry-point-result=void  \
-// RUN:  -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
-// RUN: FileCheck %s
-
-#SparseMatrix = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>
-
-#trait_op = {
-  indexing_maps = [
-    affine_map<(i,j) -> (i,j)>, // A
-    affine_map<(i,j) -> (i,j)>, // B
-    affine_map<(i,j) -> (i,j)>  // X (out)
-  ],
-  iterator_types = ["parallel","parallel"],
-  doc = "X(i,j) = A(i,j) OP B(i,j)"
-}
-
-module {
-  // Performs triangular add/sub operation (using semi-ring binary op).
-  func.func @triangular(%A: tensor<4x4xf64, #SparseMatrix>,
-                        %B: tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix> {
-    %C = bufferization.alloc_tensor() : tensor<4x4xf64, #SparseMatrix>
-    %0 = linalg.generic #trait_op
-      ins(%A, %B: tensor<4x4xf64, #SparseMatrix>,
-                  tensor<4x4xf64, #SparseMatrix>)
-      outs(%C: tensor<4x4xf64, #SparseMatrix>) {
-        ^bb0(%a: f64, %b: f64, %c: f64) :
-          %row = linalg.index 0 : index
-          %col = linalg.index 1 : index
-          %result = sparse_tensor.binary %a, %b : f64, f64 to f64
-            overlap={
-              ^bb0(%x: f64, %y: f64):
-                %cmp = arith.cmpi "uge", %col, %row : index
-                %upperTriangleResult = arith.addf %x, %y : f64
-                %lowerTriangleResult = arith.subf %x, %y : f64
-                %ret = arith.select %cmp, %upperTriangleResult, %lowerTriangleResult : f64
-                sparse_tensor.yield %ret : f64
-            }
-            left=identity
-            right={
-              ^bb0(%y: f64):
-                %cmp = arith.cmpi "uge", %col, %row : index
-                %lowerTriangleResult = arith.negf %y : f64
-                %ret = arith.select %cmp, %y, %lowerTriangleResult : f64
-                sparse_tensor.yield %ret : f64
-            }
-          linalg.yield %result : f64
-      } -> tensor<4x4xf64, #SparseMatrix>
-    return %0 : tensor<4x4xf64, #SparseMatrix>
-  }
-
-  // Driver method to call and verify triangular kernel.
-  func.func @entry() {
-    %c0 = arith.constant 0 : index
-    %du = arith.constant -1.0 : f64
-
-    %am = arith.constant dense<
-      [ [ 1.0, 0.0, 3.0, 0.0],
-        [ 0.0, 2.0, 0.0, 0.0],
-        [ 0.0, 0.0, 0.0, 4.0],
-        [ 3.0, 4.0, 0.0, 0.0] ]> : tensor<4x4xf64>
-    %bm = arith.constant dense<
-      [ [ 1.0, 0.0, 1.0, 1.0],
-        [ 0.0, 0.5, 0.0, 0.0],
-        [ 1.0, 5.0, 2.0, 0.0],
-        [ 2.0, 0.0, 0.0, 0.0] ]> : tensor<4x4xf64>
-
-    %a = sparse_tensor.convert %am : tensor<4x4xf64> to tensor<4x4xf64, #SparseMatrix>
-    %b = sparse_tensor.convert %bm : tensor<4x4xf64> to tensor<4x4xf64, #SparseMatrix>
-    %0 = call @triangular(%a, %b) : (tensor<4x4xf64, #SparseMatrix>,
-                                     tensor<4x4xf64, #SparseMatrix>) -> tensor<4x4xf64, #SparseMatrix>
-
-    //
-    // Verify the results.
-    //
-    // CHECK:    ( ( 2, 0, 4, 1 ), ( 0, 2.5, 0, 0 ), ( -1, -5, 2, 4 ), ( 1, 4, 0, 0 ) )
-    // CHECK-NEXT: ( 2, 4, 1, 2.5, -1, -5, 2, 4, 1, 4, -1, -1, -1, -1, -1, -1 )
-    //
-    %c = sparse_tensor.convert %0 : tensor<4x4xf64, #SparseMatrix> to tensor<4x4xf64>
-    %m = bufferization.to_memref %c : memref<4x4xf64>
-    %v = vector.transfer_read %m[%c0, %c0], %du: memref<4x4xf64>, vector<4x4xf64>
-    vector.print %v : vector<4x4xf64>
-    %1 = sparse_tensor.values %0 : tensor<4x4xf64, #SparseMatrix> to memref<?xf64>
-    %2 = vector.transfer_read %1[%c0], %du: memref<?xf64>, vector<16xf64>
-    vector.print %2 : vector<16xf64>
-
-    // Release the resources.
-    memref.dealloc %m : memref<4x4xf64>
-    sparse_tensor.release %a : tensor<4x4xf64, #SparseMatrix>
-    sparse_tensor.release %b : tensor<4x4xf64, #SparseMatrix>
-    sparse_tensor.release %0 : tensor<4x4xf64, #SparseMatrix>
-    return
-  }
-}