124 changes: 124 additions & 0 deletions mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
//===- Bufferize.cpp - Bufferization for std ops --------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements bufferization of tensor-valued std.constant ops.
//
//===----------------------------------------------------------------------===//

#include "PassDetail.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/Dialect/StandardOps/Transforms/Passes.h"
#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/Transforms/Bufferize.h"
#include "mlir/Transforms/DialectConversion.h"

using namespace mlir;

namespace {
// This class creates global ops for all tensor-valued constants in the program.
// It creates them with pretty names and makes sure that duplicate globals
// aren't created.
class GlobalCreator {
public:
explicit GlobalCreator(ModuleOp module);
GlobalMemrefOp getGlobalFor(Attribute attr) {
assert(globals.find(attr) != globals.end() && "unknown constant attr");
return globals[attr];
}

private:
DenseMap<Attribute, GlobalMemrefOp> globals;
};

GlobalCreator::GlobalCreator(ModuleOp module) {
BufferizeTypeConverter typeConverter;
// Create a builder without an insertion point. We will insert using the
// symbol table to guarantee unique names.
OpBuilder globalBuilder(module.getContext());
SymbolTable symbolTable(module);
module.walk([&](ConstantOp op) {
// We only want tensor constants for now.
auto type = op.getType().dyn_cast<RankedTensorType>();
if (!type)
return;
// If we already have a global for this constant value, no need to do
// anything else.
auto it = globals.find(op.getValue());
if (it != globals.end())
return;

// Create a pretty name.
SmallString<64> buf;
llvm::raw_svector_ostream os(buf);
interleave(type.getShape(), os, "x");
os << "x" << type.getElementType();

auto global = globalBuilder.create<GlobalMemrefOp>(
op.getLoc(), (Twine("__constant_") + os.str()).str(),
/*sym_visibility=*/globalBuilder.getStringAttr("private"),
/*type=*/
TypeAttr::get(typeConverter.convertType(type)), /*initial_value=*/
op.getValue().cast<ElementsAttr>(), /*constant=*/true);
symbolTable.insert(global);
// The symbol table inserts at the end of the module, but globals are a bit
// nicer if they are at the beginning.
global.getOperation()->moveBefore(&module.front());
globals[op.getValue()] = global;
});
}
} // namespace

namespace {
class BufferizeTensorConstantOp : public OpConversionPattern<ConstantOp> {
public:
BufferizeTensorConstantOp(GlobalCreator &globals,
TypeConverter &typeConverter, MLIRContext *context)
: OpConversionPattern<ConstantOp>(typeConverter, context, /*benefit=*/1),
globals(globals) {}

LogicalResult
matchAndRewrite(ConstantOp op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
auto type = op.getType().dyn_cast<RankedTensorType>();
if (!type)
return failure();

auto globalMemref = globals.getGlobalFor(op.value());
rewriter.replaceOpWithNewOp<GetGlobalMemrefOp>(op, globalMemref.type(),
globalMemref.getName());
return success();
}
GlobalCreator &globals;
};
} // namespace

namespace {
struct TensorConstantBufferizePass
: public TensorConstantBufferizeBase<TensorConstantBufferizePass> {
void runOnOperation() override {
auto module = getOperation();
GlobalCreator globals(module);

auto *context = &getContext();
BufferizeTypeConverter typeConverter;
OwningRewritePatternList patterns;
ConversionTarget target(*context);

target.addLegalDialect<StandardOpsDialect>();
patterns.insert<BufferizeTensorConstantOp>(globals, typeConverter, context);
target.addDynamicallyLegalOp<ConstantOp>(
[&](ConstantOp op) { return typeConverter.isLegal(op.getType()); });
if (failed(applyPartialConversion(module, target, std::move(patterns))))
signalPassFailure();
}
};
} // namespace

std::unique_ptr<Pass> mlir::createTensorConstantBufferizePass() {
return std::make_unique<TensorConstantBufferizePass>();
}
39 changes: 17 additions & 22 deletions mlir/test/Dialect/Linalg/bufferize.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -94,24 +94,6 @@ func @dynamic_results(%arg0: tensor<?x?xf32>)

// -----

// Check lowering of tensor-valued std.constant's
// TODO: Move this to std-bufferize.

// CHECK-LABEL: func @constant() -> tensor<2x3xf32> {
// CHECK: %[[VECTOR_MEMREF:.*]] = alloc() : memref<vector<6xf32>>
// CHECK: %[[VECTOR_CONST:.*]] = constant dense<[0.000000e+00, 1.000000e+00, 2.000000e+00, 3.000000e+00, 4.000000e+00, 5.000000e+00]> : vector<6xf32>
// CHECK: store %[[VECTOR_CONST]], %[[VECTOR_MEMREF]][] : memref<vector<6xf32>>
// CHECK: %[[MEMREF:.*]] = vector.type_cast %[[VECTOR_MEMREF]] : memref<vector<6xf32>> to memref<6xf32>
// CHECK: %[[FINAL_SHAPE:.*]] = linalg.reshape %[[MEMREF]] [#map] : memref<6xf32> into memref<2x3xf32>
// CHECK: %[[RESULT:.*]] = tensor_load %[[FINAL_SHAPE]] : memref<2x3xf32>
// CHECK: return %[[RESULT]] : tensor<2x3xf32>
func @constant() -> tensor<2x3xf32> {
%0 = constant dense<[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]> : tensor<2x3xf32>
return %0: tensor<2x3xf32>
}

// -----

#accesses = [
affine_map<(i, j, k) -> (j, i, k)>,
affine_map<(i, j, k) -> (i, j)>
Expand Down Expand Up @@ -199,20 +181,33 @@ func @bufferize_subtensor_insert(%t : tensor<?x?xf32>, %st0 : tensor<2x3xf32>, %
// CHECK: %[[IDX:.*]] = call @make_index() : () -> index
%i0 = call @make_index() : () -> index


// CHECK-DAG: %[[M0:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
// CHECK-DAG: %[[SM0:.*]] = tensor_to_memref %[[ST0]] : memref<2x3xf32>
// CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0]][0, 0] [2, 3] [1, 1]
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
// CHECK-NEXT: %[[DIM0:.*]] = dim %[[M0]], %[[C0]] : memref<?x?xf32>
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
// CHECK-NEXT: %[[DIM1:.*]] = dim %[[M0]], %[[C1]] : memref<?x?xf32>
// CHECK-NEXT: %[[M0_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
// CHECK-NEXT: linalg.copy(%[[M0]], %[[M0_COPY]]) : memref<?x?xf32>, memref<?x?xf32>
// CHECK-NEXT: %[[SUBVIEW0:.*]] = subview %[[M0_COPY]][0, 0] [2, 3] [1, 1]
// CHECK-SAME: memref<?x?xf32> to memref<2x3xf32, #[[$MAP0]]>
// CHECK-NEXT: linalg.copy(%[[SM0]], %[[SUBVIEW0]]) : memref<2x3xf32>, memref<2x3xf32, #[[$MAP0]]>
// CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[M0]] : memref<?x?xf32>
// CHECK-NEXT: %[[RT0:.*]] = tensor_load %[[M0_COPY]] : memref<?x?xf32>
%t0 = subtensor_insert %st0 into %t[0, 0][2, 3][1, 1] : tensor<2x3xf32> into tensor<?x?xf32>

// CHECK-DAG: %[[M1:.*]] = tensor_to_memref %[[T]] : memref<?x?xf32>
// CHECK-DAG: %[[SM1:.*]] = tensor_to_memref %[[ST1]] : memref<2x?xf32>
// CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
// CHECK-NEXT: %[[DIM0:.*]] = dim %[[M1]], %[[C0]] : memref<?x?xf32>
// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
// CHECK-NEXT: %[[DIM1:.*]] = dim %[[M1]], %[[C1]] : memref<?x?xf32>
// CHECK-NEXT: %[[M1_COPY:.*]] = alloc(%[[DIM0]], %[[DIM1]]) : memref<?x?xf32>
// CHECK-NEXT: linalg.copy(%[[M1]], %[[M1_COPY]]) : memref<?x?xf32>, memref<?x?xf32>
// CHECK-NEXT: %[[SUBVIEW1:.*]] = subview %[[M1_COPY]][0, %[[IDX]]] [2, %[[IDX]]] [1, 2]
// CHECK-SAME: memref<?x?xf32> to memref<2x?xf32, #[[$MAP1]]>
// CHECK-NEXT: linalg.copy(%[[SM1]], %[[SUBVIEW1]]) : memref<2x?xf32>, memref<2x?xf32, #[[$MAP1]]>
// CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[M1]] : memref<?x?xf32>
// CHECK-NEXT: %[[RT1:.*]] = tensor_load %[[M1_COPY]] : memref<?x?xf32>
%t1 = subtensor_insert %st1 into %t[0, %i0][2, %i0][1, 2] : tensor<2x?xf32> into tensor<?x?xf32>

// CHECK: return %[[RT0]], %[[RT1]]
Expand Down
59 changes: 59 additions & 0 deletions mlir/test/Dialect/Standard/tensor-constant-bufferize.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// RUN: mlir-opt %s -tensor-constant-bufferize -split-input-file

// CHECK-LABEL: module {
// We check the debug name too since we put some effort into making that readable.
// The name isn't load-bearing though.
// CHECK: global_memref "private" constant @__constant_3x4xf32 : memref<3x4xf32> = dense<7.000000e+00>
// CHECK: @basic
func @basic() -> tensor<3x4xf32> {
// CHECK: %[[MEMREF:.*]] = get_global_memref @__constant_3x4xf32 : memref<3x4xf32>
// CHECK: %[[TENSOR:.*]] = tensor_load %[[MEMREF]]
%0 = constant dense<7.0> : tensor<3x4xf32>
// CHECK: return %[[TENSOR]]
return %0 : tensor<3x4xf32>
}

// CHECK: }

// -----

// CHECK-LABEL: module {

// Only one global is created.
// CHECK: global_memref
// CHECK-NOT: global_memref
func @duplicate_constants() -> (tensor<3x4xf32>, tensor<3x4xf32>) {
%0 = constant dense<7.0> : tensor<3x4xf32>
%1 = constant dense<7.0> : tensor<3x4xf32>
return %0, %1 : tensor<3x4xf32>, tensor<3x4xf32>
}

// CHECK: }

// -----

// CHECK-LABEL: module {

// Two globals are created.
// CHECK: global_memref
// CHECK: global_memref
// CHECK-NOT: global_memref
func @multiple_constants() -> (tensor<3x4xf32>, tensor<3x4xf32>) {
%0 = constant dense<7.0> : tensor<3x4xf32>
%1 = constant dense<8.0> : tensor<3x4xf32>
return %0, %1 : tensor<3x4xf32>, tensor<3x4xf32>
}

// CHECK: }

// -----

// CHECK-LABEL: module {
// We don't convert non-tensor globals.
// CHECK-NOT: global_memref
func @non_tensor() {
%0 = constant 7 : i32
return
}

// CHECK: }