Skip to content

Commit

Permalink
[mlir][sparse] fix bug in workspace dimension computation
Browse files Browse the repository at this point in the history
Access pattern expansion is always done along the innermost stored
dimension, but this was incorrectly reordered due to using a
general utility typically used by original dimensions only.

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D133472
  • Loading branch information
aartbik committed Sep 8, 2022
1 parent ac3b8df commit ec8f290
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1166,10 +1166,15 @@ class SparseTensorExpandConverter : public OpConversionPattern<ExpandOp> {
Type idxType = rewriter.getIndexType();
// All initialization should be done on entry of the loop nest.
rewriter.setInsertionPointAfter(op.getTensor().getDefiningOp());
// Determine the size for access expansion.
// Determine the size for access expansion (always the innermost stored
// dimension size, but we need to translate it back to the original
// dimension since the dim size utility applies dimension ordering).
auto enc = getSparseTensorEncoding(srcType);
Value src = adaptor.getOperands()[0];
Value sz = genDimSizeCall(rewriter, loc, enc, src, srcType.getRank() - 1);
unsigned innerDim = srcType.getRank() - 1;
if (AffineMap p = enc.getDimOrdering())
innerDim = p.getDimPosition(innerDim);
Value sz = genDimSizeCall(rewriter, loc, enc, src, innerDim);
// Allocate temporary buffers for values, filled-switch, and indices.
// We do not use stack buffers for this, since the expanded size may
// be rather large (as it envelops a single expanded dense dimension).
Expand Down
8 changes: 5 additions & 3 deletions mlir/test/Dialect/SparseTensor/conversion.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -494,17 +494,19 @@ func.func @sparse_insert(%arg0: tensor<128xf32, #SparseVector>,
}

// CHECK-LABEL: func @sparse_expansion()
// CHECK: %[[S:.*]] = call @sparseDimSize
// CHECK-DAG: %[[C:.*]] = arith.constant 1 : index
// CHECK: %[[N:.*]] = call @newSparseTensor
// CHECK: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C]]) : (!llvm.ptr<i8>, index) -> index
// CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
// CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
// CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
// CHECK: return %[[C]] : memref<?xindex>
func.func @sparse_expansion() -> memref<?xindex> {
%0 = bufferization.alloc_tensor() : tensor<8x8xf64, #SparseMatrix>
%0 = bufferization.alloc_tensor() : tensor<4x8xf64, #SparseMatrix>
%values, %filled, %added, %count = sparse_tensor.expand %0
: tensor<8x8xf64, #SparseMatrix> to memref<?xf64>, memref<?xi1>, memref<?xindex>, index
: tensor<4x8xf64, #SparseMatrix> to memref<?xf64>, memref<?xi1>, memref<?xindex>, index
return %added : memref<?xindex>
}

Expand Down
129 changes: 122 additions & 7 deletions mlir/test/Dialect/SparseTensor/sparse_expand.mlir
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
// RUN: mlir-opt %s -sparsification | \
// RUN: mlir-opt %s --linalg-generalize-named-ops \
// RUN: --linalg-fuse-elementwise-ops \
// RUN: --sparsification | \
// RUN: FileCheck %s --check-prefix=CHECK-SPARSE
// RUN: mlir-opt %s -sparsification -sparse-tensor-conversion | \
// RUN: mlir-opt %s --linalg-generalize-named-ops \
// RUN: --linalg-fuse-elementwise-ops \
// RUN: --sparsification --sparse-tensor-conversion --cse | \
// RUN: FileCheck %s --check-prefix=CHECK-CONVERT

#CSR = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "compressed" ]
}>

#CSC = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "compressed" ],
dimOrdering = affine_map<(i,j) -> (j,i)>
}>

#DCSC = #sparse_tensor.encoding<{
dimLevelType = [ "compressed", "compressed" ],
dimOrdering = affine_map<(i,j) -> (j,i)>
Expand All @@ -24,22 +37,28 @@
//
// CHECK-SPARSE-LABEL: func @kernel(
// CHECK-SPARSE: %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand
// CHECK-SPARSE: scf.for
// CHECK-SPARSE: scf.for
// CHECK-SPARSE: scf.for {{.*}} {
// CHECK-SPARSE: scf.for {{.*}} {
// CHECK-SPARSE: }
// CHECK-SPARSE: }
// CHECK-SPARSE: sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]]
// CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts
// CHECK-SPARSE: return %[[RET]]
//
// CHECK-CONVERT-LABEL: func @kernel(
// CHECK-CONVERT: %[[C:.*]] = arith.constant 0 : index
// CHECK-CONVERT: %{{.*}} = call @sparseDimSize
// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize
// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor
// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C]])
// CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
// CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
// CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
// CHECK-CONVERT: scf.for
// CHECK-CONVERT: scf.for
// CHECK-CONVERT: scf.for {{.*}} {
// CHECK-CONVERT: scf.for {{.*}} {
// CHECK-CONVERT: }
// CHECK-CONVERT: }
// CHECK-CONVERT: call @expInsertF64
// CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
// CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
Expand All @@ -59,3 +78,99 @@ func.func @kernel(%arga: tensor<?x?xf64, #DCSC>) -> tensor<?xf64, #SV> {
} -> tensor<?xf64, #SV>
return %0 : tensor<?xf64, #SV>
}

//
// CHECK-SPARSE-LABEL: func @matmul1(
// CHECK-SPARSE-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-SPARSE-DAG: %[[C1:.*]] = arith.constant 1 : index
// CHECK-SPARSE-DAG: %[[C8:.*]] = arith.constant 8 : index
// CHECK-SPARSE: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] {
// CHECK-SPARSE: %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand
// CHECK-SPARSE: scf.for {{.*}} {
// CHECK-SPARSE: scf.for {{.*}} {
// CHECK-SPARSE: }
// CHECK-SPARSE: }
// CHECK-SPARSE: sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]]
// CHECK-SPARSE: }
// CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts
// CHECK-SPARSE: return %[[RET]]
//
// CHECK-CONVERT-LABEL: func @matmul1(
// CHECK-CONVERT-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-CONVERT-DAG: %[[C1:.*]] = arith.constant 1 : index
// CHECK-CONVERT-DAG: %[[C8:.*]] = arith.constant 8 : index
// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor
// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C1]])
// CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
// CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
// CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
// CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] {
// CHECK-CONVERT: scf.for {{.*}} {
// CHECK-CONVERT: scf.for {{.*}} {
// CHECK-CONVERT: }
// CHECK-CONVERT: }
// CHECK-CONVERT: call @expInsertF64
// CHECK-CONVERT: }
// CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
// CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
// CHECK-CONVERT: memref.dealloc %[[C]] : memref<?xindex>
// CHECK-CONVERT: call @endInsert
//
func.func @matmul1(%A: tensor<8x2xf64, #CSR>,
%B: tensor<2x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> {
%C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSR>
%D = linalg.matmul
ins(%A, %B: tensor<8x2xf64, #CSR>, tensor<2x4xf64, #CSR>)
outs(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR>
return %D: tensor<8x4xf64, #CSR>
}

//
// CHECK-SPARSE-LABEL: func @matmul2(
// CHECK-SPARSE-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-SPARSE-DAG: %[[C1:.*]] = arith.constant 1 : index
// CHECK-SPARSE-DAG: %[[C4:.*]] = arith.constant 4 : index
// CHECK-SPARSE: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] {
// CHECK-SPARSE: %[[A:.*]], %[[B:.*]], %[[C:.*]], %{{.*}} = sparse_tensor.expand
// CHECK-SPARSE: scf.for {{.*}} {
// CHECK-SPARSE: scf.for {{.*}} {
// CHECK-SPARSE: }
// CHECK-SPARSE: }
// CHECK-SPARSE: sparse_tensor.compress %{{.*}}, %{{.*}}, %[[A]], %[[B]], %[[C]]
// CHECK-SPARSE: }
// CHECK-SPARSE: %[[RET:.*]] = sparse_tensor.load %{{.*}} hasInserts
// CHECK-SPARSE: return %[[RET]]
//
// CHECK-CONVERT-LABEL: func @matmul2(
// CHECK-CONVERT-DAG: %[[C0:.*]] = arith.constant 0 : index
// CHECK-CONVERT-DAG: %[[C1:.*]] = arith.constant 1 : index
// CHECK-CONVERT-DAG: %[[C4:.*]] = arith.constant 4 : index
// CHECK-CONVERT: %[[N:.*]] = call @newSparseTensor
// CHECK-CONVERT: %[[S:.*]] = call @sparseDimSize(%[[N]], %[[C1]])
// CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
// CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
// CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
// CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] {
// CHECK-CONVERT: scf.for {{.*}} {
// CHECK-CONVERT: scf.for {{.*}} {
// CHECK-CONVERT: }
// CHECK-CONVERT: }
// CHECK-CONVERT: call @expInsertF64
// CHECK-CONVERT: }
// CHECK-CONVERT: memref.dealloc %[[A]] : memref<?xf64>
// CHECK-CONVERT: memref.dealloc %[[B]] : memref<?xi1>
// CHECK-CONVERT: memref.dealloc %[[C]] : memref<?xindex>
// CHECK-CONVERT: call @endInsert
//
func.func @matmul2(%A: tensor<8x2xf64, #CSC>,
%B: tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> {
%C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC>
%D = linalg.matmul
ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>)
outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
return %D: tensor<8x4xf64, #CSC>
}
79 changes: 79 additions & 0 deletions mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// RUN: mlir-opt %s --sparse-compiler | \
// RUN: mlir-cpu-runner -e entry -entry-point-result=void \
// RUN: -shared-libs=%mlir_lib_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s

#CSC = #sparse_tensor.encoding<{
dimLevelType = [ "dense", "compressed" ],
dimOrdering = affine_map<(i,j) -> (j,i)>
}>

module {
//
// Column-wise storage forces the ijk loop to permute into jki
// so that access pattern expansion (workspace) needs to be
// done along dimension with size 8.
//
func.func @matmul(%A: tensor<8x2xf64, #CSC>,
%B: tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> {
%C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC>
%D = linalg.matmul
ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>)
outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
return %D: tensor<8x4xf64, #CSC>
}

//
// Main driver.
//
func.func @entry() {
%c0 = arith.constant 0 : index
%d1 = arith.constant -1.0 : f64

// Initialize various dense matrices for stress testing.
%da = arith.constant dense<[
[ 1.1, 2.1 ],
[ 1.2, 2.2 ],
[ 1.3, 2.3 ],
[ 1.4, 2.4 ],
[ 1.5, 2.5 ],
[ 1.6, 2.6 ],
[ 1.7, 2.7 ],
[ 1.8, 2.8 ]
]> : tensor<8x2xf64>
%db = arith.constant dense<[
[ 10.1, 11.1, 12.1, 13.1 ],
[ 10.2, 11.2, 12.2, 13.2 ]
]> : tensor<2x4xf64>

// Convert all these matrices to sparse format.
%x1 = sparse_tensor.convert %da : tensor<8x2xf64> to tensor<8x2xf64, #CSC>
%x2 = sparse_tensor.convert %db : tensor<2x4xf64> to tensor<2x4xf64, #CSC>

// Call kernels with dense.
%x3 = call @matmul(%x1, %x2)
: (tensor<8x2xf64, #CSC>,
tensor<2x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>

//
// CHECK: ( ( 32.53, 35.73, 38.93, 42.13 ),
// CHECK-SAME: ( 34.56, 37.96, 41.36, 44.76 ),
// CHECK-SAME: ( 36.59, 40.19, 43.79, 47.39 ),
// CHECK-SAME: ( 38.62, 42.42, 46.22, 50.02 ),
// CHECK-SAME: ( 40.65, 44.65, 48.65, 52.65 ),
// CHECK-SAME: ( 42.68, 46.88, 51.08, 55.28 ),
// CHECK-SAME: ( 44.71, 49.11, 53.51, 57.91 ),
// CHECK-SAME: ( 46.74, 51.34, 55.94, 60.54 ) )
//
%xc = sparse_tensor.convert %x3 : tensor<8x4xf64, #CSC> to tensor<8x4xf64>
%xv = vector.transfer_read %xc[%c0, %c0], %d1 : tensor<8x4xf64>, vector<8x4xf64>
vector.print %xv : vector<8x4xf64>

// Release the resources.
bufferization.dealloc_tensor %x1 : tensor<8x2xf64, #CSC>
bufferization.dealloc_tensor %x2 : tensor<2x4xf64, #CSC>
bufferization.dealloc_tensor %x3 : tensor<8x4xf64, #CSC>

return
}
}

0 comments on commit ec8f290

Please sign in to comment.