Skip to content

Commit

Permalink
[mlir][sparse] add sparse kernels test to sparse compiler test suite
Browse files Browse the repository at this point in the history
This test makes sure kernels map to efficient sparse code, i.e. all
compressed for-loops, no co-iterating while loops.  In addition, this
revision removes the special constant folding inside the sparse
compiler in favor of Mahesh' new generic linalg folding. Thanks!

NOTE: relies on Mahesh fix, which needs to be rebased first

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D110001
  • Loading branch information
aartbik committed Sep 22, 2021
1 parent 1552179 commit a924fcc
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 20 deletions.
1 change: 0 additions & 1 deletion mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ class Merger {
Value v1);

private:
bool isZero(unsigned e) const;
bool maybeZero(unsigned e) const;
bool isInvariant(unsigned e) const;
Type inferType(unsigned e, Value src);
Expand Down
16 changes: 0 additions & 16 deletions mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,11 +489,6 @@ unsigned Merger::buildLattices(unsigned e, unsigned i) {
// ---+---+---+ ---+---+---+
// !x | 0 | y | !x | 0 |-y |
// x | x |x+y| x | x |x-y|
//
// TODO: remove this zero "folding" in favor of external pass into linalg
//
if (isZero(tensorExps[e].children.e1))
return buildLattices(tensorExps[e].children.e0, i);
return takeDisj(kind, // take binary disjunction
buildLattices(tensorExps[e].children.e0, i),
buildLattices(tensorExps[e].children.e1, i));
Expand All @@ -516,17 +511,6 @@ Optional<unsigned> Merger::buildTensorExpFromLinalg(linalg::GenericOp op) {
return buildTensorExp(op, yield->getOperand(0));
}

/// Only returns true if we are certain this is a zero.
bool Merger::isZero(unsigned e) const {
if (tensorExps[e].kind == kInvariant) {
if (auto c = tensorExps[e].val.getDefiningOp<ConstantIntOp>())
return c.getValue() == 0;
if (auto c = tensorExps[e].val.getDefiningOp<ConstantFloatOp>())
return c.getValue().isZero();
}
return false;
}

/// Only returns false if we are certain this is a nonzero.
bool Merger::maybeZero(unsigned e) const {
if (tensorExps[e].kind == kInvariant) {
Expand Down
157 changes: 157 additions & 0 deletions mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparsification | FileCheck %s

#DCSR = #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>

// CHECK-LABEL: func @matmul(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<20x30xf32>,
// CHECK-SAME: %[[VAL_2:.*]]: tensor<10x30xf32>) -> tensor<10x30xf32> {
// CHECK-DAG: %[[VAL_3:.*]] = constant 0 : index
// CHECK-DAG: %[[VAL_4:.*]] = constant 1 : index
// CHECK-DAG: %[[VAL_5:.*]] = constant 30 : index
// CHECK: %[[VAL_6:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_3]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_3]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_0]], %[[VAL_4]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_11:.*]] = memref.buffer_cast %[[VAL_1]] : memref<20x30xf32>
// CHECK: %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<10x30xf32>
// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<10x30xf32>
// CHECK: memref.copy %[[VAL_12]], %[[VAL_13]] : memref<10x30xf32> to memref<10x30xf32>
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_4]] {
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = addi %[[VAL_16]], %[[VAL_4]] : index
// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_19]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_20]] step %[[VAL_4]] {
// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]]] : memref<?xindex>
// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<?xf32>
// CHECK: scf.for %[[VAL_24:.*]] = %[[VAL_3]] to %[[VAL_5]] step %[[VAL_4]] {
// CHECK: %[[VAL_25:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_17]], %[[VAL_24]]] : memref<10x30xf32>
// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_22]], %[[VAL_24]]] : memref<20x30xf32>
// CHECK: %[[VAL_27:.*]] = mulf %[[VAL_23]], %[[VAL_26]] : f32
// CHECK: %[[VAL_28:.*]] = addf %[[VAL_25]], %[[VAL_27]] : f32
// CHECK: memref.store %[[VAL_28]], %[[VAL_13]]{{\[}}%[[VAL_17]], %[[VAL_24]]] : memref<10x30xf32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_29:.*]] = memref.tensor_load %[[VAL_13]] : memref<10x30xf32>
// CHECK: return %[[VAL_29]] : tensor<10x30xf32>
// CHECK: }
func @matmul(%a: tensor<10x20xf32, #DCSR>,
%b: tensor<20x30xf32>,
%c: tensor<10x30xf32>) -> tensor<10x30xf32> {
%0 = linalg.matmul
ins(%a, %b: tensor<10x20xf32, #DCSR>, tensor<20x30xf32>)
outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32>
return %0 : tensor<10x30xf32>
}

// CHECK-LABEL: func @conv2d(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<8x8xi32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>,
// CHECK-SAME: %[[VAL_2:.*]]: tensor<6x6xi32>) -> tensor<6x6xi32> {
// CHECK-DAG: %[[VAL_3:.*]] = constant 0 : index
// CHECK-DAG: %[[VAL_4:.*]] = constant 1 : index
// CHECK-DAG: %[[VAL_5:.*]] = constant 6 : index
// CHECK: %[[VAL_6:.*]] = memref.buffer_cast %[[VAL_0]] : memref<8x8xi32>
// CHECK: %[[VAL_7:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_3]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_8:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_3]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_9:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_12:.*]] = memref.buffer_cast %[[VAL_2]] : memref<6x6xi32>
// CHECK: %[[VAL_13:.*]] = memref.alloc() : memref<6x6xi32>
// CHECK: memref.copy %[[VAL_12]], %[[VAL_13]] : memref<6x6xi32> to memref<6x6xi32>
// CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_4]] {
// CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_16]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = addi %[[VAL_16]], %[[VAL_4]] : index
// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_19]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_20]] step %[[VAL_4]] {
// CHECK: %[[VAL_22:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_21]]] : memref<?xindex>
// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_21]]] : memref<?xi32>
// CHECK: scf.for %[[VAL_24:.*]] = %[[VAL_3]] to %[[VAL_5]] step %[[VAL_4]] {
// CHECK: scf.for %[[VAL_25:.*]] = %[[VAL_3]] to %[[VAL_5]] step %[[VAL_4]] {
// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_25]], %[[VAL_24]]] : memref<6x6xi32>
// CHECK: %[[VAL_27:.*]] = addi %[[VAL_25]], %[[VAL_17]] : index
// CHECK: %[[VAL_28:.*]] = addi %[[VAL_24]], %[[VAL_22]] : index
// CHECK: %[[VAL_29:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_27]], %[[VAL_28]]] : memref<8x8xi32>
// CHECK: %[[VAL_30:.*]] = muli %[[VAL_29]], %[[VAL_23]] : i32
// CHECK: %[[VAL_31:.*]] = addi %[[VAL_26]], %[[VAL_30]] : i32
// CHECK: memref.store %[[VAL_31]], %[[VAL_13]]{{\[}}%[[VAL_25]], %[[VAL_24]]] : memref<6x6xi32>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_32:.*]] = memref.tensor_load %[[VAL_13]] : memref<6x6xi32>
// CHECK: return %[[VAL_32]] : tensor<6x6xi32>
// CHECK: }
func @conv2d(%input: tensor<8x8xi32>,
%filter: tensor<3x3xi32, #DCSR>,
%output: tensor<6x6xi32>) -> tensor<6x6xi32> {
%0 = linalg.conv_2d
ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
return %0 : tensor<6x6xi32>
}

// CHECK-LABEL: func @quantized_matmul(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<5x3xi8>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>,
// CHECK-SAME: %[[VAL_2:.*]]: tensor<5x6xi64>) -> tensor<5x6xi64> {
// CHECK-DAG: %[[VAL_3:.*]] = constant 2 : i64
// CHECK-DAG: %[[VAL_4:.*]] = constant 0 : index
// CHECK-DAG: %[[VAL_5:.*]] = constant 1 : index
// CHECK-DAG: %[[VAL_6:.*]] = constant 5 : index
// CHECK: %[[VAL_7:.*]] = memref.buffer_cast %[[VAL_0]] : memref<5x3xi8>
// CHECK: %[[VAL_8:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_4]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_4]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_10:.*]] = sparse_tensor.pointers %[[VAL_1]], %[[VAL_5]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_11:.*]] = sparse_tensor.indices %[[VAL_1]], %[[VAL_5]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>
// CHECK: %[[VAL_13:.*]] = memref.buffer_cast %[[VAL_2]] : memref<5x6xi64>
// CHECK: %[[VAL_14:.*]] = memref.alloc() : memref<5x6xi64>
// CHECK: memref.copy %[[VAL_13]], %[[VAL_14]] : memref<5x6xi64> to memref<5x6xi64>
// CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_5]] {
// CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK: %[[VAL_20:.*]] = addi %[[VAL_17]], %[[VAL_5]] : index
// CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_20]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_22:.*]] = %[[VAL_19]] to %[[VAL_21]] step %[[VAL_5]] {
// CHECK: %[[VAL_23:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_24:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_22]]] : memref<?xi8>
// CHECK: scf.for %[[VAL_25:.*]] = %[[VAL_4]] to %[[VAL_6]] step %[[VAL_5]] {
// CHECK: %[[VAL_26:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_25]], %[[VAL_23]]] : memref<5x6xi64>
// CHECK: %[[VAL_27:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_25]], %[[VAL_18]]] : memref<5x3xi8>
// CHECK: %[[VAL_28:.*]] = sexti %[[VAL_27]] : i8 to i64
// CHECK: %[[VAL_29:.*]] = subi %[[VAL_28]], %[[VAL_3]] : i64
// CHECK: %[[VAL_30:.*]] = sexti %[[VAL_24]] : i8 to i64
// CHECK: %[[VAL_31:.*]] = muli %[[VAL_29]], %[[VAL_30]] : i64
// CHECK: %[[VAL_32:.*]] = addi %[[VAL_26]], %[[VAL_31]] : i64
// CHECK: memref.store %[[VAL_32]], %[[VAL_14]]{{\[}}%[[VAL_25]], %[[VAL_23]]] : memref<5x6xi64>
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_33:.*]] = memref.tensor_load %[[VAL_14]] : memref<5x6xi64>
// CHECK: return %[[VAL_33]] : tensor<5x6xi64>
// CHECK: }
func @quantized_matmul(%input1: tensor<5x3xi8>,
%input2: tensor<3x6xi8, #DCSR>,
%output: tensor<5x6xi64>) -> tensor<5x6xi64> {
%c0 = constant 0 : i32
%c2 = constant 2 : i32
%0 = linalg.quantized_matmul
ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32)
outs(%output : tensor<5x6xi64>) -> tensor<5x6xi64>
return %0: tensor<5x6xi64>
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparsification --sparse-tensor-conversion \
// RUN: --convert-vector-to-scf --convert-scf-to-std \
// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
Expand All @@ -14,7 +14,7 @@
// Do the same run, but now with SIMDization as well. This should not change the outcome.
//
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparsification="vectorization-strategy=2 vl=2" --sparse-tensor-conversion \
// RUN: --convert-vector-to-scf --convert-scf-to-std \
// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// RUN: mlir-opt %s \
// RUN: --linalg-generalize-named-ops \
// RUN: --linalg-generalize-named-ops --linalg-fuse-elementwise-ops \
// RUN: --sparsification --sparse-tensor-conversion \
// RUN: --convert-vector-to-scf --convert-scf-to-std \
// RUN: --func-bufferize --tensor-constant-bufferize --tensor-bufferize \
Expand Down

0 comments on commit a924fcc

Please sign in to comment.