Skip to content

Commit

Permalink
[mlir][sparse][simd] only accept proper unit stride subscripts
Browse files Browse the repository at this point in the history
Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D139983
  • Loading branch information
aartbik committed Dec 14, 2022
1 parent 6fbcb3f commit 70ac598
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 3 deletions.
28 changes: 25 additions & 3 deletions mlir/lib/Dialect/SparseTensor/Transforms/SparseVectorization.cpp
Expand Up @@ -234,10 +234,28 @@ static Value genVectorReducInit(PatternRewriter &rewriter, Location loc,
static bool vectorizeSubscripts(PatternRewriter &rewriter, scf::ForOp forOp,
VL vl, ValueRange subs, bool codegen,
Value vmask, SmallVectorImpl<Value> &idxs) {
unsigned d = 0;
unsigned dim = subs.size();
for (auto sub : subs) {
// Invariant/loop indices simply pass through.
if (sub.dyn_cast<BlockArgument>() ||
bool innermost = ++d == dim;
// Invariant subscripts in outer dimensions simply pass through.
// Note that we rely on LICM to hoist loads where all subscripts
// are invariant in the innermost loop.
if (sub.getDefiningOp() &&
sub.getDefiningOp()->getBlock() != &forOp.getRegion().front()) {
if (innermost)
return false;
if (codegen)
idxs.push_back(sub);
continue; // success so far
}
// Invariant block arguments (including outer loop indices) in outer
// dimensions simply pass through. Direct loop indices in the
// innermost loop simply pass through as well.
if (auto barg = sub.dyn_cast<BlockArgument>()) {
bool invariant = barg.getOwner() != &forOp.getRegion().front();
if (invariant == innermost)
return false;
if (codegen)
idxs.push_back(sub);
continue; // success so far
Expand All @@ -264,6 +282,8 @@ static bool vectorizeSubscripts(PatternRewriter &rewriter, scf::ForOp forOp,
// which creates the potential of incorrect address calculations in the
// unlikely case we need such extremely large offsets.
if (auto load = cast.getDefiningOp<memref::LoadOp>()) {
if (!innermost)
return false;
if (codegen) {
SmallVector<Value> idxs2(load.getIndices()); // no need to analyze
Location loc = forOp.getLoc();
Expand All @@ -286,9 +306,11 @@ static bool vectorizeSubscripts(PatternRewriter &rewriter, scf::ForOp forOp,
if (auto load = cast.getDefiningOp<arith::AddIOp>()) {
Value inv = load.getOperand(0);
Value idx = load.getOperand(1);
if (!inv.dyn_cast<BlockArgument>() &&
if (inv.getDefiningOp() &&
inv.getDefiningOp()->getBlock() != &forOp.getRegion().front() &&
idx.dyn_cast<BlockArgument>()) {
if (!innermost)
return false;
if (codegen)
idxs.push_back(
rewriter.create<arith::AddIOp>(forOp.getLoc(), inv, idx));
Expand Down
31 changes: 31 additions & 0 deletions mlir/test/Dialect/SparseTensor/sparse_vector_concat.mlir
@@ -0,0 +1,31 @@
// RUN: mlir-opt %s --sparse-compiler="enable-runtime-library=false vl=2 reassociate-fp-reductions=true enable-index-optimizations=true"

#MAT_D_C = #sparse_tensor.encoding<{
dimLevelType = ["dense", "compressed"]
}>

#MAT_C_C_P = #sparse_tensor.encoding<{
dimLevelType = [ "compressed", "compressed" ],
dimOrdering = affine_map<(i,j) -> (j,i)>
}>

#MAT_C_D_P = #sparse_tensor.encoding<{
dimLevelType = [ "compressed", "dense" ],
dimOrdering = affine_map<(i,j) -> (j,i)>
}>

//
// Ensures only last loop is vectorized
// (vectorizing the others would crash).
//
// CHECK-LABEL: llvm.func @foo
// CHECK: llvm.intr.masked.load
// CHECK: llvm.intr.masked.scatter
//
func.func @foo(%arg0: tensor<2x4xf64, #MAT_C_C_P>,
%arg1: tensor<3x4xf64, #MAT_C_D_P>,
%arg2: tensor<4x4xf64, #MAT_D_C>) -> tensor<9x4xf64> {
%0 = sparse_tensor.concatenate %arg0, %arg1, %arg2 {dimension = 0 : index}
: tensor<2x4xf64, #MAT_C_C_P>, tensor<3x4xf64, #MAT_C_D_P>, tensor<4x4xf64, #MAT_D_C> to tensor<9x4xf64>
return %0 : tensor<9x4xf64>
}

0 comments on commit 70ac598

Please sign in to comment.