Skip to content

Commit

Permalink
[mlir][tensor][bufferize] Support memory_space for tensor.pad
Browse files Browse the repository at this point in the history
This change adds memory space support to tensor.pad. (tensor.generate and tensor.from_elements do not support memory spaces yet.)

The memory space is inferred from the buffer of the source tensor.

Instead of lowering tensor.pad to tensor.generate + tensor.insert_slice, it is now lowered to bufferization.alloc_tensor (with the correct memory space) + linalg.map + tensor.insert_slice.

Memory space support for the remaining two tensor ops is left for a later point, as this requires some more design discussions.

Differential Revision: https://reviews.llvm.org/D136265
  • Loading branch information
matthias-springer committed Oct 27, 2022
1 parent e26f287 commit 09dfb44
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 17 deletions.
47 changes: 34 additions & 13 deletions mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,8 @@ struct InsertSliceOpInterface
}
};

/// Bufferization of tensor.pad. Replace with tensor.generate + insert_slice.
/// Bufferization of tensor.pad. Replace with bufferization.alloc_tensor +
/// linalg.map + insert_slice.
/// For best performance, vectorize before bufferization (better performance in
/// case of padding with a constant).
struct PadOpInterface
Expand All @@ -804,6 +805,21 @@ struct PadOpInterface
return {};
}

FailureOr<BaseMemRefType>
getBufferType(Operation *op, Value value, const BufferizationOptions &options,
const DenseMap<Value, BaseMemRefType> &fixedTypes) const {
// Infer memory space from the source tensor.
auto padOp = cast<tensor::PadOp>(op);
auto maybeSrcBufferType =
bufferization::getBufferType(padOp.getSource(), options, fixedTypes);
if (failed(maybeSrcBufferType))
return failure();
MemRefLayoutAttrInterface layout;
return MemRefType::get(padOp.getResultType().getShape(),
padOp.getResultType().getElementType(), layout,
maybeSrcBufferType->getMemorySpace());
}

LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
const BufferizationOptions &options) const {
auto padOp = cast<tensor::PadOp>(op);
Expand Down Expand Up @@ -837,25 +853,30 @@ struct PadOpInterface
dynamicSizes.push_back(sum);
}

// Create tensor::GenerateOp.
auto generateOp =
rewriter.create<tensor::GenerateOp>(loc, resultType, dynamicSizes);
// Move over "escape" attribute if present.
if (padOp->hasAttr(BufferizationDialect::kEscapeAttrName))
generateOp->setAttr(
BufferizationDialect::kEscapeAttrName,
padOp->getAttr(BufferizationDialect::kEscapeAttrName));
// TODO: Memory space
rewriter.inlineRegionBefore(padOp.getRegion(), generateOp.getBody(),
generateOp.getBody().begin());
// Should the buffer be deallocated?
bool dealloc =
shouldDeallocateOpResult(padOp.getResult().cast<OpResult>(), options);
// Allocate a buffer for the padded result.
FailureOr<Value> tensorAlloc =
allocateTensorForShapedValue(rewriter, loc, padOp.getResult(),
/*escape=*/!dealloc, options,
/*copy=*/false);
if (failed(tensorAlloc))
return failure();

// tensor::PadOp is like tensor::GenerateOp: The only difference is that
// only a part of the generated tensor is needed. For simplicity, we reuse
// the same functionality here.
Value filledBuffer = lowerGenerateLikeOpBody(
rewriter, loc, *tensorAlloc, dynamicSizes, padOp.getBodyRegion());

// Create tensor::InsertSliceOp.
SmallVector<OpFoldResult> sliceSizes =
getMixedSizes(rewriter, loc, padOp.getSource());
SmallVector<OpFoldResult> sliceStrides(srcType.getRank(),
rewriter.getIndexAttr(1));
rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
padOp, padOp.getSource(), generateOp.getResult(),
padOp, padOp.getSource(), filledBuffer,
/*offsets=*/padOp.getMixedLowPad(), sliceSizes, sliceStrides);

return success();
Expand Down
8 changes: 4 additions & 4 deletions mlir/test/Dialect/Tensor/bufferize.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -539,19 +539,19 @@ func.func @tensor.reshape(%t1: tensor<?x10xf32>) -> tensor<2x2x5xf32> {

// -----

// CHECK: #[[$sum_map:.+]] = affine_map<()[s0, s1, s2] -> (s0 + s1 + s2)>
// CHECK: #[[$sum_map_1:.+]] = affine_map<()[s0, s1] -> (s1 + s0 + 5)>
// CHECK: #[[$sum_map_2:.+]] = affine_map<()[s0, s1] -> (s0 + s1 + 10)>
// CHECK-LABEL: func @tensor.pad(
// CHECK-SAME: %[[t1:.*]]: tensor<?x10xindex>, %[[l2:.*]]: index, %[[h1:.*]]: index, %[[h2:.*]]: index
func.func @tensor.pad(%t1: tensor<?x10xindex>, %l2: index, %h1: index,
%h2: index) -> tensor<?x?xindex> {
// CHECK-DAG: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref<?x10xindex>
// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index
// CHECK-DAG: %[[dim0:.*]] = memref.dim %[[m1]], %[[c0]]
// CHECK-DAG: %[[dim1:.*]] = memref.dim %[[m1]], %[[c1]]
// CHECK-DAG: %[[size0:.*]] = affine.apply #[[$sum_map]]()[%[[dim0]], %[[c5]], %[[h1]]]
// CHECK-DAG: %[[size1:.*]] = affine.apply #[[$sum_map]]()[%[[dim1]], %[[l2]], %[[h2]]]
// CHECK-DAG: %[[size0:.*]] = affine.apply #[[$sum_map_1]]()[%[[h1]], %[[dim0]]]
// CHECK-DAG: %[[size1:.*]] = affine.apply #[[$sum_map_2]]()[%[[l2]], %[[h2]]]
// CHECK: %[[alloc:.*]] = memref.alloc(%[[size0]], %[[size1]]) {{.*}} : memref<?x?xindex>
// CHECK: %[[alloc_t:.*]] = bufferization.to_tensor %[[alloc]]
// CHECK: %[[mapped:.*]] = linalg.map
Expand Down
28 changes: 28 additions & 0 deletions mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,31 @@ func.func @insert_equivalent_tensor(%t: tensor<10xf32>) -> tensor<10xf32> {
%1 = tensor.insert_slice %0 into %t[0][10][1] : tensor<10xf32> into tensor<10xf32>
return %1 : tensor<10xf32>
}

// -----

// CHECK-LABEL: func @pad_memory_space(
// CHECK-SAME: %[[t:.*]]: memref<?xf32, strided<[?], offset: ?>>
func.func @pad_memory_space(%t: tensor<?xf32>, %h1: index, %f: f32, %pos: index) -> f32
{
// CHECK: %[[alloc_tensor:.*]] = memref.alloc{{.*}} : memref<?xf32, 3>
// CHECK: memref.copy %[[t]], %[[alloc_tensor]]
%0 = bufferization.alloc_tensor() copy(%t)
{memory_space = 3 : ui64} : tensor<?xf32>
// CHECK: %[[padded_alloc:.*]] = memref.alloc() {{.*}} : memref<15xf32, 3>
// CHECK: linalg.map
// CHECK: outs(%[[padded_alloc]] : memref<15xf32, 3>)
// CHECK: linalg.yield %{{.*}}
// CHECK: }
// CHECK: %[[subview:.*]] = memref.subview {{.*}} : memref<15xf32, 3> to memref<?xf32, strided<[1], offset: 2>, 3>
// CHECK: memref.copy %[[alloc_tensor]], %[[subview]]
%1 = tensor.pad %0 low[2] high[%h1] {
^bb0(%arg0: index):
tensor.yield %f : f32
} : tensor<?xf32> to tensor<15xf32>
// CHECK: memref.load {{.*}} : memref<15xf32, 3>
%2 = tensor.extract %1[%pos] : tensor<15xf32>
// CHECK-DAG: memref.dealloc %[[alloc_tensor]]
// CHECK-DAG: memref.dealloc %[[padded_alloc]]
return %2 : f32
}

0 comments on commit 09dfb44

Please sign in to comment.