/
ArithToArmSME.cpp
125 lines (101 loc) · 4.76 KB
/
ArithToArmSME.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
//===- ArithToArmSME.cpp - Arith to ArmSME dialect conversion -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "mlir/Conversion/ArithToArmSME/ArithToArmSME.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/ArmSME/IR/ArmSME.h"
#include "mlir/Dialect/ArmSME/Utils/Utils.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
namespace mlir {
#define GEN_PASS_DEF_ARITHTOARMSMECONVERSIONPASS
#include "mlir/Conversion/Passes.h.inc"
} // namespace mlir
#define DEBUG_TYPE "arith-to-arm-sme"
using namespace mlir;
//===----------------------------------------------------------------------===//
// Conversion helpers
//===----------------------------------------------------------------------===//
/// Returns true if 'val' is a splat of zero, false otherwise.
static bool isSplatZero(Type elemType, DenseElementsAttr val) {
if (llvm::isa<FloatType>(elemType))
return val && val.isSplat() && val.getSplatValue<APFloat>().isZero();
if (llvm::isa<IntegerType>(elemType))
return val && val.isSplat() && val.getSplatValue<APInt>().isZero();
return false;
}
namespace {
//===----------------------------------------------------------------------===//
// ConstantOp
//===----------------------------------------------------------------------===//
/// Conversion pattern for dense arith.constant.
struct ConstantOpToArmSMELowering : public OpRewritePattern<arith::ConstantOp> {
using OpRewritePattern<arith::ConstantOp>::OpRewritePattern;
LogicalResult matchAndRewrite(arith::ConstantOp constantOp,
PatternRewriter &rewriter) const final {
auto tileType = dyn_cast<VectorType>(constantOp.getType());
if (!tileType || !arm_sme::isValidSMETileVectorType(tileType))
return failure();
auto denseAttr = dyn_cast<DenseElementsAttr>(constantOp.getValueAttr());
if (!denseAttr || !denseAttr.isSplat())
return failure();
auto tileElementType = tileType.getElementType();
// Lower 'arith.constant dense<0>' to 'arm_sme.zero' op.
if (isSplatZero(tileElementType, denseAttr)) {
rewriter.replaceOpWithNewOp<arm_sme::ZeroOp>(constantOp, tileType);
return success();
}
// Lower non-zero constants to a loop of 'arm_sme.move_vector_to_tile_slice'
// ops that broadcast the constant to each tile slice.
auto loc = constantOp.getLoc();
// To fill a tile with a constant, we create a 1-D splat of the constant,
// then move that into each tile slice (the largest unit we can set at once,
// outside of operations like the outerproduct).
VectorType tileSliceType = VectorType::Builder(tileType).dropDim(0);
auto denseAttr1D = DenseElementsAttr::get(
tileSliceType, denseAttr.getSplatValue<Attribute>());
auto constantOp1D = rewriter.create<arith::ConstantOp>(loc, denseAttr1D);
auto initTile = rewriter.create<arm_sme::GetTileOp>(loc, tileType);
auto makeLoopBody = [&](OpBuilder &b, Location loc, Value tileSliceIndex,
Value currentTile) {
// Create 'arm_sme.move_vector_to_tile_slice' to write vector to tile
// slice.
auto nextTile = b.create<arm_sme::MoveVectorToTileSliceOp>(
loc, tileType, constantOp1D, currentTile, tileSliceIndex);
return nextTile.getResult();
};
auto forOp = mlir::arm_sme::createLoopOverTileSlices(
rewriter, loc, initTile, makeLoopBody);
rewriter.replaceOp(constantOp, forOp.getResult(0));
return success();
}
};
} // namespace
//===----------------------------------------------------------------------===//
// Pattern population
//===----------------------------------------------------------------------===//
void mlir::arith::populateArithToArmSMEConversionPatterns(
RewritePatternSet &patterns) {
patterns.add<ConstantOpToArmSMELowering>(patterns.getContext());
}
//===----------------------------------------------------------------------===//
// Pass definition
//===----------------------------------------------------------------------===//
namespace {
struct ArithToArmSMEConversionPass final
: impl::ArithToArmSMEConversionPassBase<ArithToArmSMEConversionPass> {
using impl::ArithToArmSMEConversionPassBase<
ArithToArmSMEConversionPass>::ArithToArmSMEConversionPassBase;
void runOnOperation() override {
RewritePatternSet patterns(&getContext());
arith::populateArithToArmSMEConversionPatterns(patterns);
if (failed(
applyPatternsAndFoldGreedily(getOperation(), std::move(patterns))))
return signalPassFailure();
}
};
} // namespace