Skip to content

Commit

Permalink
[mlir][linalg] Fix padding size calculation for Conv2d ops.
Browse files Browse the repository at this point in the history
This patch fixed the padding size calculation for Conv2d ops when the stride > 1. It contains the changes below:

- Use addBound to add constraint for AffineApplyOp in getUpperBoundForIndex. So the result value can be mapped and retrieved later.

- Fixed the bound from AffineMinOp by adding as a closed bound. Originally the bound was added as an open upper bound, which results in the incorrect bounds when we multiply the values. For example:

```
%0 = affine.min affine_map<()[s0] -> (4, -s0 + 11)>()[iv0]
%1 = affine.apply affine_map<()[s0] -> (s0 * 2)>()[%0]

If we add the affine.min as an open bound, addBound will internally transform it into the close bound "%0 <= 3". The following sliceBounds will derive the bound of %1 as "%1 <= 6" and return the open bound "%1 < 7", while the correct bound should be "%1 <= 8".
```

- In addition to addBound, I also changed sliceBounds to support returning closed upper bound, since for the size computation, we usually care about the closed bounds.

- Change the getUpperBoundForIndex to favor constant bounds when required. The sliceBounds will return a tighter but non-constant bounds, which can't be used for padding. The constantRequired option requires getUpperBoundForIndex to get the constant bounds when possible.

Reviewed By: hanchung

Differential Revision: https://reviews.llvm.org/D124821
  • Loading branch information
pzread authored and hanhanW committed May 9, 2022
1 parent e287d64 commit ad7c49b
Show file tree
Hide file tree
Showing 5 changed files with 140 additions and 21 deletions.
23 changes: 22 additions & 1 deletion mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h
Expand Up @@ -166,8 +166,25 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron {
/// bound map is expected to have exactly one result. In case of a LB/UB, the
/// bound map may have more than one result, for each of which an inequality
/// is added.
///
/// The bound can be added as open or closed by specifying isClosedBound. In
/// case of a LB/UB, isClosedBound = false means the bound is added internally
/// as a closed bound by +1/-1 respectively. In case of an EQ bound, it can
/// only be added as a closed bound.
///
/// Note: The dimensions/symbols of this FlatAffineConstraints must match the
/// dimensions/symbols of the affine map.
LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap,
bool isClosedBound);

/// Adds a bound for the identifier at the specified position with constraints
/// being drawn from the specified bound map. In case of an EQ bound, the
/// bound map is expected to have exactly one result. In case of a LB/UB, the
/// bound map may have more than one result, for each of which an inequality
/// is added.
/// Note: The dimensions/symbols of this FlatAffineConstraints must match the
/// dimensions/symbols of the affine map. By default the lower bound is closed
/// and the upper bound is open.
LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap);

/// Adds a bound for the identifier at the specified position with constraints
Expand Down Expand Up @@ -197,9 +214,13 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron {
/// identifiers as floordiv's and mod's of affine expressions of other
/// identifiers with respect to (positive) constants. Sets bound map to a
/// null AffineMap if such a bound can't be found (or yet unimplemented).
///
/// By default the returned lower bounds are closed and upper bounds are open.
/// This can be changed by getClosedUB.
void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context,
SmallVectorImpl<AffineMap> *lbMaps,
SmallVectorImpl<AffineMap> *ubMaps);
SmallVectorImpl<AffineMap> *ubMaps,
bool getClosedUB = false);

/// Composes an affine map whose dimensions and symbols match one to one with
/// the dimensions and symbols of this FlatAffineConstraints. The results of
Expand Down
6 changes: 5 additions & 1 deletion mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
Expand Up @@ -50,6 +50,9 @@ SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b);
/// values. The method sets `boundMap` to an affine map that given
/// `boundOperands` evaluates to an upper bound for the index computation.
///
/// If constantRequired is true, only returns the constant bounds (potentially
/// over-approximating) and fails when not possible.
///
/// Example:
/// ```
/// %dim0 = dim %tensor, %c0
Expand All @@ -62,7 +65,8 @@ SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b);
/// - boundMap = affine.map<(d0) -> (d0 + 40)>
/// - boundOperands = [%dim1]
void getUpperBoundForIndex(Value value, AffineMap &boundMap,
SmallVectorImpl<Value> &boundOperands);
SmallVectorImpl<Value> &boundOperands,
bool constantRequired = false);

/// Returns a constant upper bound for the result `value` of an index
/// computation. Calls `getUpperBoundForIndex` and returns a constant upper
Expand Down
34 changes: 24 additions & 10 deletions mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp
Expand Up @@ -965,7 +965,8 @@ FlatAffineValueConstraints::getLowerAndUpperBound(
/// this process if needed.
void FlatAffineValueConstraints::getSliceBounds(
unsigned offset, unsigned num, MLIRContext *context,
SmallVectorImpl<AffineMap> *lbMaps, SmallVectorImpl<AffineMap> *ubMaps) {
SmallVectorImpl<AffineMap> *lbMaps, SmallVectorImpl<AffineMap> *ubMaps,
bool getClosedUB) {
assert(num < getNumDimIds() && "invalid range");

// Basic simplification.
Expand Down Expand Up @@ -1065,6 +1066,8 @@ void FlatAffineValueConstraints::getSliceBounds(
// again.
} while (changed);

int64_t ubAdjustment = getClosedUB ? 0 : 1;

// Set the lower and upper bound maps for all the identifiers that were
// computed as affine expressions of the rest as the "detected expr" and
// "detected expr + 1" respectively; set the undetected ones to null.
Expand All @@ -1081,7 +1084,7 @@ void FlatAffineValueConstraints::getSliceBounds(

if (expr) {
lbMap = AffineMap::get(numMapDims, numMapSymbols, expr);
ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + 1);
ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + ubAdjustment);
} else {
// TODO: Whenever there are local identifiers in the dependence
// constraints, we'll conservatively over-approximate, since we don't
Expand Down Expand Up @@ -1118,9 +1121,10 @@ void FlatAffineValueConstraints::getSliceBounds(
<< "WARNING: Potentially over-approximating slice ub\n");
auto ubConst = getConstantBound(BoundType::UB, pos + offset);
if (ubConst.hasValue()) {
(ubMap) = AffineMap::get(
numMapDims, numMapSymbols,
getAffineConstantExpr(ubConst.getValue() + 1, context));
ubMap =
AffineMap::get(numMapDims, numMapSymbols,
getAffineConstantExpr(
ubConst.getValue() + ubAdjustment, context));
}
}
}
Expand Down Expand Up @@ -1158,10 +1162,13 @@ LogicalResult FlatAffineValueConstraints::flattenAlignedMapAndMergeLocals(
}

LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos,
AffineMap boundMap) {
AffineMap boundMap,
bool isClosedBound) {
assert(boundMap.getNumDims() == getNumDimIds() && "dim mismatch");
assert(boundMap.getNumSymbols() == getNumSymbolIds() && "symbol mismatch");
assert(pos < getNumDimAndSymbolIds() && "invalid position");
assert((type != BoundType::EQ || isClosedBound) &&
"EQ bound must be closed.");

// Equality follows the logic of lower bound except that we add an equality
// instead of an inequality.
Expand Down Expand Up @@ -1193,17 +1200,24 @@ LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos,
for (unsigned i = boundMap.getNumInputs(); i < end; i++, j++) {
ineq[j] = lower ? -flatExpr[i] : flatExpr[i];
}
// Make the bound closed in if flatExpr is open. The inequality is always
// created in the upper bound form, so the adjustment is -1.
int64_t boundAdjustment = (isClosedBound || type == BoundType::EQ) ? 0 : -1;
// Constant term.
ineq[getNumCols() - 1] =
lower ? -flatExpr[flatExpr.size() - 1]
// Upper bound in flattenedExpr is an exclusive one.
: flatExpr[flatExpr.size() - 1] - 1;
ineq[getNumCols() - 1] = (lower ? -flatExpr[flatExpr.size() - 1]
: flatExpr[flatExpr.size() - 1]) +
boundAdjustment;
type == BoundType::EQ ? addEquality(ineq) : addInequality(ineq);
}

return success();
}

LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos,
AffineMap boundMap) {
return addBound(type, pos, boundMap, /*isClosedBound=*/type != BoundType::UB);
}

AffineMap
FlatAffineValueConstraints::computeAlignedMap(AffineMap map,
ValueRange operands) const {
Expand Down
35 changes: 26 additions & 9 deletions mlir/lib/Dialect/Linalg/Utils/Utils.cpp
Expand Up @@ -177,7 +177,8 @@ SmallVector<Value, 4> getDynOperands(Location loc, Value val, OpBuilder &b) {
}

void getUpperBoundForIndex(Value value, AffineMap &boundMap,
SmallVectorImpl<Value> &boundOperands) {
SmallVectorImpl<Value> &boundOperands,
bool constantRequired) {
// Initialize `boundMap` and `boundOperands` to the identity returning
// `value`. This combination is the default result of the method if no
// simplification is possible.
Expand Down Expand Up @@ -226,11 +227,13 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap,
if (!(llvm::all_of(op->getResults(), findOrCreateId) &&
llvm::all_of(op->getOperands(), findOrCreateId)))
return;

// Add AffineApplyOps to the constraints.
if (auto applyOp = dyn_cast<AffineApplyOp>(op)) {
AffineValueMap valueMap(applyOp.getAffineMap(), applyOp.getOperands(),
applyOp.getResult());
if (failed(constraints.composeMap(&valueMap)))
AffineMap map = constraints.computeAlignedMap(applyOp.getAffineMap(),
applyOp.getOperands());
if (failed(constraints.addBound(IntegerPolyhedron::EQ,
getPosition(applyOp.getResult()), map)))
return;
continue;
}
Expand All @@ -239,17 +242,30 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap,
AffineMap map = constraints.computeAlignedMap(minOp.getAffineMap(),
minOp.getOperands());
if (failed(constraints.addBound(IntegerPolyhedron::UB,
getPosition(minOp.getResult()), map)))
getPosition(minOp.getResult()), map,
/*isClosedBound=*/true)))
return;
}

// Obtain an upper bound for the affine index computation by projecting out
// all temporary results and expressing the upper bound for `value` in terms
// of the terminals of the index computation.
SmallVector<AffineMap> lowerBounds(1), upperBounds(1);
constraints.getSliceBounds(getPosition(value), 1, value.getContext(),
&lowerBounds, &upperBounds);
unsigned pos = getPosition(value);
if (constantRequired) {
auto ubConst = constraints.getConstantBound(
FlatAffineValueConstraints::BoundType::UB, pos);
if (!ubConst.hasValue())
return;

boundMap =
AffineMap::getConstantMap(ubConst.getValue(), value.getContext());
return;
}

SmallVector<AffineMap> lowerBounds(1), upperBounds(1);
constraints.getSliceBounds(pos, 1, value.getContext(), &lowerBounds,
&upperBounds,
/*getClosedUB=*/true);
// Verify `upperBounds[0]` is valid and has at least one result.
if (!upperBounds[0] || upperBounds[0].getNumResults() == 0)
return;
Expand All @@ -265,7 +281,8 @@ FailureOr<int64_t> getConstantUpperBoundForIndex(Value value) {
// Compute an upper bound for `value`.
AffineMap boundMap;
SmallVector<Value> boundOperands;
getUpperBoundForIndex(value, boundMap, boundOperands);
getUpperBoundForIndex(value, boundMap, boundOperands,
/*constantRequired=*/true);

// Search the results of `boundMap` for constant upper bounds.
SmallVector<int64_t> constantBounds;
Expand Down
63 changes: 63 additions & 0 deletions mlir/test/Dialect/Linalg/pad.mlir
Expand Up @@ -3,6 +3,7 @@
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.fill pad padding-values=0.:f32,0.:f32 pack-paddings=0,1 padding-dimensions=0,1,2 run-enable-pass=false" -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1,2 pack-paddings=0,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=FILL-MATMUL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32 pack-paddings=1,1,0 padding-dimensions=0,1,2 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=INPUTS-ONLY
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=0,1 pack-paddings=1,1,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=PARTIAL
// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.depthwise_conv_2d_nhwc_hwc pad padding-values=0.:f32,0.:f32,0.:f32 padding-dimensions=1,2 pack-paddings=1,0,1 run-enable-pass=false" -cse -split-input-file | FileCheck %s --check-prefix=DEPTHWISE_CONV_2D

// MATMUL-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 12, 7)>
// MATMUL-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 7)>
Expand Down Expand Up @@ -535,3 +536,65 @@ func.func @padding_the_output_dims_only(%arg0: tensor<24x12xf32>,
%5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [%0, %0] [1, 1] : tensor<?x?xf32> into tensor<24x25xf32>
func.return %5 : tensor<24x25xf32>
}

// -----

// DEPTHWISE_CONV_2D-DAG: #[[MAP0:[0-9a-z]+]] = affine_map<()[s0] -> (4, -s0 + 11)>
// DEPTHWISE_CONV_2D-DAG: #[[MAP1:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * 2)>
// DEPTHWISE_CONV_2D-DAG: #[[MAP2:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * 2 + 1)>
// DEPTHWISE_CONV_2D-DAG: #[[MAP3:[0-9a-z]+]] = affine_map<()[s0] -> (s0 * -2 + 8)>
// DEPTHWISE_CONV_2D-DAG: #[[MAP4:[0-9a-z]+]] = affine_map<()[s0] -> (-s0 + 4)>

#map0 = affine_map<()[s0] -> (4, -s0 + 11)>
#map1 = affine_map<()[s0] -> (s0 * 2)>
#map2 = affine_map<()[s0] -> (s0 * 2 + 1)>

// DEPTHWISE_CONV_2D: depthwise_conv_2d_padding
// DEPTHWISE_CONV_2D-SAME: %[[ARG0:[0-9a-zA-Z]*]]: tensor<1x23x3x16xf32>
// DEPTHWISE_CONV_2D-SAME: %[[ARG1:[0-9a-zA-Z]*]]: tensor<3x3x16xf32>
// DEPTHWISE_CONV_2D-SAME: %[[ARG2:[0-9a-zA-Z]*]]: tensor<1x13x1x16xf32>
// DEPTHWISE_CONV_2D-SAME: %[[IV0:[0-9a-zA-Z]*]]: index
func.func @depthwise_conv_2d_padding(%arg0: tensor<1x23x3x16xf32>,
%arg1: tensor<3x3x16xf32>,
%arg2: tensor<1x13x1x16xf32>,
%iv0: index) -> tensor<1x?x1x16xf32> {
// DEPTHWISE_CONV_2D-DAG: %[[CST:.*]] = arith.constant 0.
// DEPTHWISE_CONV_2D-DAG: %[[C0:.*]] = arith.constant 0 : index
// DEPTHWISE_CONV_2D-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[IV0]]]
%0 = affine.min #map0()[%iv0]
%1 = affine.apply #map1()[%iv0]
%2 = affine.apply #map2()[%0]

// DEPTHWISE_CONV_2D: %[[T3:.*]] = tensor.extract_slice %[[ARG0]]
// DEPTHWISE_CONV_2D: %[[T4:.*]] = tensor.extract_slice %[[ARG2]]
%3 = tensor.extract_slice %arg0[0, %1, 0, 0] [1, %2, 3, 16] [1, 1, 1, 1] : tensor<1x23x3x16xf32> to tensor<1x?x3x16xf32>
%4 = tensor.extract_slice %arg2[0, %iv0, 0, 0] [1, %0, 1, 16] [1, 1, 1, 1] : tensor<1x13x1x16xf32> to tensor<1x?x1x16xf32>

// Check the padding on the input.
// DEPTHWISE_CONV_2D: %[[T5:.*]] = affine.apply #[[MAP3]]()[%[[T0]]]
// DEPTHWISE_CONV_2D: %[[T6:.*]] = tensor.pad %[[T3]]
// DEPTHWISE_CONV_2D-SAME: low[%[[C0]], %[[C0]], %[[C0]], %[[C0]]]
// DEPTHWISE_CONV_2D-SAME: high[%[[C0]], %[[T5]], %[[C0]], %[[C0]]]
// DEPTHWISE_CONV_2D: tensor.yield %[[CST]] : f32

// Check the padding on the output.
// DEPTHWISE_CONV_2D: %[[T7:.*]] = affine.apply #[[MAP4]]()[%[[T0]]]
// DEPTHWISE_CONV_2D: %[[T8:.*]] = tensor.pad %[[T4]]
// DEPTHWISE_CONV_2D-SAME: low[%[[C0]], %[[C0]], %[[C0]], %[[C0]]]
// DEPTHWISE_CONV_2D-SAME: high[%[[C0]], %[[T7]], %[[C0]], %[[C0]]]
// DEPTHWISE_CONV_2D: tensor.yield %[[CST]] : f32

// DEPTHWISE_CONV_2D: %[[T9:.*]] = linalg.depthwise_conv_2d_nhwc_hwc
// DEPTHWISE_CONV_2D-SAME: ins(%[[T6]], %[[ARG1]] : tensor<1x9x3x16xf32>, tensor<3x3x16xf32>)
// DEPTHWISE_CONV_2D-SAME: outs(%[[T8]] : tensor<1x4x1x16xf32>)
%5 = linalg.depthwise_conv_2d_nhwc_hwc
{dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
ins(%3, %arg1 : tensor<1x?x3x16xf32>, tensor<3x3x16xf32>)
outs(%4 : tensor<1x?x1x16xf32>) -> tensor<1x?x1x16xf32>

// Check the extract_slice to crop the padded output before return.
// DEPTHWISE_CONV_2D: %[[T10:.*]] = tensor.extract_slice %[[T9]][0, 0, 0, 0]
// DEPTHWISE_CONV_2D-SAME: [1, %[[T0]], 1, 16]
// DEPTHWISE_CONV_2D: return %[[T10]] : tensor<1x?x1x16xf32>
return %5 : tensor<1x?x1x16xf32>
}

0 comments on commit ad7c49b

Please sign in to comment.