Skip to content

Commit

Permalink
[mlir][sparse] implement direct IR alloc/empty/new for non-permutatio…
Browse files Browse the repository at this point in the history
…ns (#72585)

This change implements the correct *level* sizes set up for the direct
IR codegen fields in the sparse storage scheme. This brings libgen and
codegen together again.

This is step 3 out of 3 to make sparse_tensor.new work for BSR
  • Loading branch information
aartbik committed Nov 17, 2023
1 parent d97981c commit 83cf0dc
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 121 deletions.
2 changes: 1 addition & 1 deletion mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//===- File.h - Reading sparse tensors from files --------------*- C++ -*-===//
//===- File.h - Reading sparse tensors from files ---------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
Expand Down
135 changes: 70 additions & 65 deletions mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,41 +159,46 @@ static Value createAllocation(OpBuilder &builder, Location loc,
return buffer;
}

/// Creates the dim sizes array, filling in from dynamic sizes.
static void createDimSizes(OpBuilder &builder, Location loc,
SparseTensorType stt, ValueRange dynSizes,
/*out*/ SmallVectorImpl<Value> &dimSizesValues) {
const Dimension dimRank = stt.getDimRank();
dimSizesValues.clear();
dimSizesValues.reserve(dimRank);
unsigned i = 0;
for (const Size sz : stt.getDimShape())
dimSizesValues.push_back(ShapedType::isDynamic(sz)
? dynSizes[i++]
: constantIndex(builder, loc, sz));
}

/// Creates allocation for each field in sparse tensor type. Note that
/// for all dynamic memrefs in the sparse tensor stroage layout, the
/// memory size is really the capacity of the "vector", while the actual
/// size resides in the sizes array.
static void createAllocFields(OpBuilder &builder, Location loc,
SparseTensorType stt, ValueRange dynSizes,
bool enableInit, SmallVectorImpl<Value> &fields,
Value sizeHint) {
// Build original sizes.
assert((dynSizes.size() == static_cast<size_t>(stt.getNumDynamicDims())) &&
"Got wrong number of dynamic sizes");
const Dimension dimRank = stt.getDimRank();
SmallVector<Value> dimSizes;
dimSizes.reserve(dimRank);
unsigned i = 0; // cumulative index into `dynSizes`.
for (const Size sh : stt.getDimShape())
dimSizes.push_back(ShapedType::isDynamic(sh)
? dynSizes[i++]
: constantIndex(builder, loc, sh));

SparseTensorType stt, bool enableInit,
Value sizeHint,
SmallVectorImpl<Value> &lvlSizesValues,
/*out*/ SmallVectorImpl<Value> &fields) {
Level lvlRank = stt.getLvlRank();
// Set up some heuristic sizes. We try to set the initial
// size based on available information. Otherwise we just
// initialize a few elements to start the reallocation chain.
// TODO: refine this
Value posHeuristic, crdHeuristic, valHeuristic;
if (stt.isAllDense()) {
valHeuristic = dimSizes[0];
for (const Value sz : ArrayRef<Value>{dimSizes}.drop_front())
valHeuristic = builder.create<arith::MulIOp>(loc, valHeuristic, sz);
valHeuristic = lvlSizesValues[0];
for (Level lvl = 1; lvl < lvlRank; lvl++)
valHeuristic =
builder.create<arith::MulIOp>(loc, valHeuristic, lvlSizesValues[lvl]);
} else if (sizeHint) {
if (getCOOStart(stt.getEncoding()) == 0) {
posHeuristic = constantIndex(builder, loc, 2);
crdHeuristic = builder.create<arith::MulIOp>(
loc, constantIndex(builder, loc, dimRank), sizeHint); // AOS
} else if (dimRank == 2 && stt.isDenseLvl(0) && stt.isCompressedLvl(1)) {
loc, constantIndex(builder, loc, lvlRank), sizeHint); // AOS
} else if (lvlRank == 2 && stt.isDenseLvl(0) && stt.isCompressedLvl(1)) {
posHeuristic = builder.create<arith::AddIOp>(
loc, sizeHint, constantIndex(builder, loc, 1));
crdHeuristic = sizeHint;
Expand All @@ -205,7 +210,6 @@ static void createAllocFields(OpBuilder &builder, Location loc,
posHeuristic = crdHeuristic = valHeuristic =
constantIndex(builder, loc, 16);
}

// Initializes all fields. An initial storage specifier and allocated
// positions/coordinates/values memrefs (with heuristic capacity).
foreachFieldAndTypeInSparseTensor(
Expand Down Expand Up @@ -237,16 +241,13 @@ static void createAllocFields(OpBuilder &builder, Location loc,
// Returns true to continue the iteration.
return true;
});

// Initialize the storage scheme to an empty tensor. Sets the lvlSizes
// and gives all position fields an initial zero entry, so that it is
// easier to maintain the "linear + 1" length property.
MutSparseTensorDescriptor desc(stt, fields);
Value posZero = constantZero(builder, loc, stt.getPosType());
for (Level lvl = 0, lvlRank = stt.getLvlRank(); lvl < lvlRank; lvl++) {
// FIXME: `toOrigDim` is deprecated.
desc.setLvlSize(builder, loc, lvl,
dimSizes[toOrigDim(stt.getEncoding(), lvl)]);
desc.setLvlSize(builder, loc, lvl, lvlSizesValues[lvl]);
const auto dlt = stt.getLvlType(lvl);
if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt))
createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, lvl,
Expand Down Expand Up @@ -371,19 +372,19 @@ static void genEndInsert(OpBuilder &builder, Location loc,
SparseTensorDescriptor desc) {
const SparseTensorType stt(desc.getRankedTensorType());
const Level lvlRank = stt.getLvlRank();
for (Level l = 0; l < lvlRank; l++) {
const auto dlt = stt.getLvlType(l);
for (Level lvl = 0; lvl < lvlRank; lvl++) {
const auto dlt = stt.getLvlType(lvl);
if (isCompressedDLT(dlt)) {
// Compressed dimensions need a position cleanup for all entries
// that were not visited during the insertion pass.
//
// TODO: avoid cleanup and keep compressed scheme consistent at all
// times?
//
if (l > 0) {
if (lvl > 0) {
Type posType = stt.getPosType();
Value posMemRef = desc.getPosMemRef(l);
Value hi = desc.getPosMemSize(builder, loc, l);
Value posMemRef = desc.getPosMemRef(lvl);
Value hi = desc.getPosMemSize(builder, loc, lvl);
Value zero = constantIndex(builder, loc, 0);
Value one = constantIndex(builder, loc, 1);
// Vector of only one, but needed by createFor's prototype.
Expand Down Expand Up @@ -729,7 +730,6 @@ class SparseReMapConverter : public OpConversionPattern<ReinterpretMapOp> {
};

/// Sparse codegen rule for the alloc operator.
/// TODO(springerm): remove when bufferization.alloc_tensor is gone
class SparseTensorAllocConverter
: public OpConversionPattern<bufferization::AllocTensorOp> {
public:
Expand All @@ -746,6 +746,8 @@ class SparseTensorAllocConverter
if (!resType.hasEncoding())
return failure();
Location loc = op.getLoc();

// Deal with copy.
if (op.getCopy()) {
auto desc = getDescriptorFromTensorTuple(adaptor.getCopy());
SmallVector<Value> fields;
Expand All @@ -766,17 +768,22 @@ class SparseTensorAllocConverter
return success();
}

// Construct the dim/lvl sizes and the (unused) dim2lvl/lvl2dim buffers.
SmallVector<Value> dimSizesValues;
SmallVector<Value> lvlSizesValues;
Value dimSizesBuffer;
Value dim2lvlBuffer;
Value lvl2dimBuffer;
createDimSizes(rewriter, loc, resType, adaptor.getDynamicSizes(),
dimSizesValues);
genMapBuffers(rewriter, loc, resType, dimSizesValues, dimSizesBuffer,
lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);

// Construct allocation for each field.
Value sizeHint = op.getSizeHint();
ValueRange dynSizes = adaptor.getDynamicSizes();
const size_t found = dynSizes.size();
const int64_t expected = resType.getNumDynamicDims();
if (found != static_cast<size_t>(expected))
return rewriter.notifyMatchFailure(op,
"Got wrong number of dynamic sizes");
SmallVector<Value> fields;
createAllocFields(rewriter, loc, resType, dynSizes,
enableBufferInitialization, fields, sizeHint);
createAllocFields(rewriter, loc, resType, enableBufferInitialization,
sizeHint, lvlSizesValues, fields);

// Replace operation with resulting memrefs.
rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields));
Expand All @@ -788,7 +795,6 @@ class SparseTensorAllocConverter
};

/// Sparse codegen rule for the empty tensor operator.
/// TODO(springerm): remove when bufferization.alloc_tensor is gone
class SparseTensorEmptyConverter : public OpConversionPattern<tensor::EmptyOp> {
public:
using OpConversionPattern::OpConversionPattern;
Expand All @@ -803,19 +809,24 @@ class SparseTensorEmptyConverter : public OpConversionPattern<tensor::EmptyOp> {
const auto resType = getSparseTensorType(op);
if (!resType.hasEncoding())
return failure();
Location loc = op.getLoc();

// Construct the dim/lvl sizes and the (unused) dim2lvl/lvl2dim buffers.
SmallVector<Value> dimSizesValues;
SmallVector<Value> lvlSizesValues;
Value dimSizesBuffer;
Value dim2lvlBuffer;
Value lvl2dimBuffer;
createDimSizes(rewriter, loc, resType, adaptor.getDynamicSizes(),
dimSizesValues);
genMapBuffers(rewriter, loc, resType, dimSizesValues, dimSizesBuffer,
lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);

// Construct allocation for each field.
Location loc = op.getLoc();
Value sizeHint; // none
const ValueRange dynSizes = adaptor.getDynamicSizes();
const size_t found = dynSizes.size();
const int64_t expected = resType.getNumDynamicDims();
if (found != static_cast<size_t>(expected))
return rewriter.notifyMatchFailure(op,
"Got wrong number of dynamic sizes");
SmallVector<Value> fields;
createAllocFields(rewriter, loc, resType, dynSizes,
enableBufferInitialization, fields, sizeHint);
createAllocFields(rewriter, loc, resType, enableBufferInitialization,
sizeHint, lvlSizesValues, fields);

// Replace operation with resulting memrefs.
rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields));
Expand Down Expand Up @@ -1461,36 +1472,30 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
// if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values)
// update storage specifier
// @delSparseTensorReader(%reader)
SmallVector<Value> dimShapesValues;
SmallVector<Value> dimSizesValues;
Value dimSizesBuffer;
Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0],
dimShapesValues, dimSizesBuffer);
dimSizesValues, dimSizesBuffer);

// Get the number of stored entries.
const Type indexTp = rewriter.getIndexType();
Value nse = createFuncCall(rewriter, loc, "getSparseTensorReaderNSE",
{indexTp}, {reader}, EmitCInterface::Off)
.getResult(0);

// Construct allocation for each field.
SmallVector<Value> dynSizes;
if (dstTp.hasDynamicDimShape()) {
for (const auto &d : llvm::enumerate(dstTp.getDimShape()))
if (ShapedType::isDynamic(d.value()))
dynSizes.push_back(rewriter.create<memref::LoadOp>(
loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index())));
}
SmallVector<Value> fields;
createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false,
fields, nse);

// Now construct the lvl sizes and the dim2lvl/lvl2dim buffers.
// Construct the lvl sizes and the dim2lvl/lvl2dim buffers.
SmallVector<Value> lvlSizesValues;
Value dim2lvlBuffer;
Value lvl2dimBuffer;
genMapBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer,
genMapBuffers(rewriter, loc, dstTp, dimSizesValues, dimSizesBuffer,
lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);

// Construct allocation for each field.
Value sizeHint = nse;
SmallVector<Value> fields;
createAllocFields(rewriter, loc, dstTp, /*enableInit=*/false, sizeHint,
lvlSizesValues, fields);

// Read the COO tensor data.
MutSparseTensorDescriptor desc(dstTp, fields);
Value xs = desc.getAOSMemRef();
Expand Down
Loading

0 comments on commit 83cf0dc

Please sign in to comment.