Skip to content

Commit edca510

Browse files
authored
[flang] Simplify hlfir.index in a few limited cases. (#157883)
Primarily targeted simplification case of substring being a singleton by inlining a search loop (with an exception where runtime function performs better). Few trivial simplifications also covered.
1 parent 01a7c88 commit edca510

File tree

2 files changed

+553
-0
lines changed

2 files changed

+553
-0
lines changed

flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2284,6 +2284,213 @@ class CmpCharOpConversion : public mlir::OpRewritePattern<hlfir::CmpCharOp> {
22842284
}
22852285
};
22862286

2287+
static std::pair<mlir::Value, hlfir::AssociateOp>
2288+
getVariable(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value val) {
2289+
// If it is an expression - create a variable from it, or forward
2290+
// the value otherwise.
2291+
hlfir::AssociateOp associate;
2292+
if (!mlir::isa<hlfir::ExprType>(val.getType()))
2293+
return {val, associate};
2294+
hlfir::Entity entity{val};
2295+
mlir::NamedAttribute byRefAttr = fir::getAdaptToByRefAttr(builder);
2296+
associate = hlfir::genAssociateExpr(loc, builder, entity, entity.getType(),
2297+
"", byRefAttr);
2298+
return {associate.getBase(), associate};
2299+
}
2300+
2301+
class IndexOpConversion : public mlir::OpRewritePattern<hlfir::IndexOp> {
2302+
public:
2303+
using mlir::OpRewritePattern<hlfir::IndexOp>::OpRewritePattern;
2304+
2305+
llvm::LogicalResult
2306+
matchAndRewrite(hlfir::IndexOp op,
2307+
mlir::PatternRewriter &rewriter) const override {
2308+
// We simplify only limited cases:
2309+
// 1) a substring length shall be known at compile time
2310+
// 2) if a substring length is 0 then replace with 1 for forward search,
2311+
// or otherwise with the string length + 1 (builder shall const-fold if
2312+
// lookup direction is known at compile time).
2313+
// 3) for known string length at compile time, if it is
2314+
// shorter than substring => replace with zero.
2315+
// 4) if a substring length is one => inline as simple search loop
2316+
// 5) for forward search with input strings of kind=1 runtime is faster.
2317+
// Do not simplify in all the other cases relying on a runtime call.
2318+
2319+
fir::FirOpBuilder builder{rewriter, op.getOperation()};
2320+
const mlir::Location &loc = op->getLoc();
2321+
2322+
auto resultTy = op.getType();
2323+
mlir::Value back = op.getBack();
2324+
mlir::Value substrLen =
2325+
hlfir::genCharLength(loc, builder, hlfir::Entity{op.getSubstr()});
2326+
2327+
auto substrLenCst = fir::getIntIfConstant(substrLen);
2328+
if (!substrLenCst) {
2329+
return rewriter.notifyMatchFailure(
2330+
op, "substring length unknown at compile time");
2331+
}
2332+
mlir::Value strLen =
2333+
hlfir::genCharLength(loc, builder, hlfir::Entity{op.getStr()});
2334+
auto i1Ty = builder.getI1Type();
2335+
auto idxTy = builder.getIndexType();
2336+
if (*substrLenCst == 0) {
2337+
mlir::Value oneIdx = builder.createIntegerConstant(loc, idxTy, 1);
2338+
// zero length substring. For back search replace with
2339+
// strLen+1, or otherwise with 1.
2340+
mlir::Value strEnd = mlir::arith::AddIOp::create(
2341+
builder, loc, builder.createConvert(loc, idxTy, strLen), oneIdx);
2342+
if (back)
2343+
back = builder.createConvert(loc, i1Ty, back);
2344+
else
2345+
back = builder.createIntegerConstant(loc, i1Ty, 0);
2346+
mlir::Value result =
2347+
mlir::arith::SelectOp::create(builder, loc, back, strEnd, oneIdx);
2348+
2349+
rewriter.replaceOp(op, builder.createConvert(loc, resultTy, result));
2350+
return mlir::success();
2351+
}
2352+
2353+
if (auto strLenCst = fir::getIntIfConstant(strLen)) {
2354+
if (*strLenCst < *substrLenCst) {
2355+
rewriter.replaceOp(op, builder.createIntegerConstant(loc, resultTy, 0));
2356+
return mlir::success();
2357+
}
2358+
if (*strLenCst == 0) {
2359+
// both strings have zero length
2360+
rewriter.replaceOp(op, builder.createIntegerConstant(loc, resultTy, 1));
2361+
return mlir::success();
2362+
}
2363+
}
2364+
if (*substrLenCst != 1) {
2365+
return rewriter.notifyMatchFailure(
2366+
op, "rely on runtime implementation if substring length > 1");
2367+
}
2368+
// For forward search and character kind=1 the runtime uses memchr
2369+
// which well optimized. But it looks like memchr idiom is not recognized
2370+
// in LLVM yet. On a micro-kernel test with strings of length 40 runtime
2371+
// had ~2x less execution time vs inlined code. For unknown search direction
2372+
// at compile time pessimistically assume "forward".
2373+
std::optional<bool> isBack;
2374+
if (back) {
2375+
if (auto backCst = fir::getIntIfConstant(back))
2376+
isBack = *backCst != 0;
2377+
} else {
2378+
isBack = false;
2379+
}
2380+
auto charTy = mlir::cast<fir::CharacterType>(
2381+
hlfir::getFortranElementType(op.getSubstr().getType()));
2382+
unsigned kind = charTy.getFKind();
2383+
if (kind == 1 && (!isBack || !*isBack)) {
2384+
return rewriter.notifyMatchFailure(
2385+
op, "rely on runtime implementation for character kind 1");
2386+
}
2387+
2388+
// All checks are passed here. Generate single character search loop.
2389+
auto [strV, strAssociate] = getVariable(builder, loc, op.getStr());
2390+
auto [substrV, substrAssociate] = getVariable(builder, loc, op.getSubstr());
2391+
hlfir::Entity str{strV};
2392+
hlfir::Entity substr{substrV};
2393+
mlir::Value oneIdx = builder.createIntegerConstant(loc, idxTy, 1);
2394+
2395+
auto genExtractAndConvertToInt = [&charTy, &idxTy, &oneIdx,
2396+
kind](mlir::Location loc,
2397+
fir::FirOpBuilder &builder,
2398+
hlfir::Entity &charStr,
2399+
mlir::Value index) {
2400+
auto bits = builder.getKindMap().getCharacterBitsize(kind);
2401+
auto intTy = builder.getIntegerType(bits);
2402+
auto charLen1Ty =
2403+
fir::CharacterType::getSingleton(builder.getContext(), kind);
2404+
mlir::Type designatorTy =
2405+
fir::ReferenceType::get(charLen1Ty, fir::isa_volatile_type(charTy));
2406+
auto idxAttr = builder.getIntegerAttr(idxTy, 0);
2407+
2408+
auto singleChr = hlfir::DesignateOp::create(
2409+
builder, loc, designatorTy, charStr, /*component=*/{},
2410+
/*compShape=*/mlir::Value{}, hlfir::DesignateOp::Subscripts{},
2411+
/*substring=*/mlir::ValueRange{index, index},
2412+
/*complexPart=*/std::nullopt,
2413+
/*shape=*/mlir::Value{}, /*typeParams=*/mlir::ValueRange{oneIdx},
2414+
fir::FortranVariableFlagsAttr{});
2415+
auto chrVal = fir::LoadOp::create(builder, loc, singleChr);
2416+
mlir::Value intVal = fir::ExtractValueOp::create(
2417+
builder, loc, intTy, chrVal, builder.getArrayAttr(idxAttr));
2418+
return intVal;
2419+
};
2420+
2421+
auto wantChar = genExtractAndConvertToInt(loc, builder, substr, oneIdx);
2422+
2423+
// Generate search loop body with the following C equivalent:
2424+
// idx_t result = 0;
2425+
// idx_t end = strlen + 1;
2426+
// char want = substr[0];
2427+
// for (idx_t idx = 1; idx < end; ++idx) {
2428+
// if (result == 0) {
2429+
// idx_t at = back ? end - idx: idx;
2430+
// result = str[at-1] == want ? at : result;
2431+
// }
2432+
// }
2433+
if (!back)
2434+
back = builder.createIntegerConstant(loc, i1Ty, 0);
2435+
else
2436+
back = builder.createConvert(loc, i1Ty, back);
2437+
mlir::Value strEnd = mlir::arith::AddIOp::create(
2438+
builder, loc, builder.createConvert(loc, idxTy, strLen), oneIdx);
2439+
mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
2440+
auto genSearchBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
2441+
mlir::ValueRange index,
2442+
mlir::ValueRange reductionArgs)
2443+
-> llvm::SmallVector<mlir::Value, 1> {
2444+
assert(index.size() == 1 && "expected single loop");
2445+
assert(reductionArgs.size() == 1 && "expected single reduction value");
2446+
mlir::Value inRes = reductionArgs[0];
2447+
auto resEQzero = mlir::arith::CmpIOp::create(
2448+
builder, loc, mlir::arith::CmpIPredicate::eq, inRes, zeroIdx);
2449+
2450+
mlir::Value res =
2451+
builder
2452+
.genIfOp(loc, {idxTy}, resEQzero,
2453+
/*withElseRegion=*/true)
2454+
.genThen([&]() {
2455+
mlir::Value idx = builder.createConvert(loc, idxTy, index[0]);
2456+
// offset = back ? end - idx : idx;
2457+
mlir::Value offset = mlir::arith::SelectOp::create(
2458+
builder, loc, back,
2459+
mlir::arith::SubIOp::create(builder, loc, strEnd, idx),
2460+
idx);
2461+
2462+
auto haveChar =
2463+
genExtractAndConvertToInt(loc, builder, str, offset);
2464+
auto charsEQ = mlir::arith::CmpIOp::create(
2465+
builder, loc, mlir::arith::CmpIPredicate::eq, haveChar,
2466+
wantChar);
2467+
mlir::Value newVal = mlir::arith::SelectOp::create(
2468+
builder, loc, charsEQ, offset, inRes);
2469+
2470+
fir::ResultOp::create(builder, loc, newVal);
2471+
})
2472+
.genElse([&]() { fir::ResultOp::create(builder, loc, inRes); })
2473+
.getResults()[0];
2474+
return {res};
2475+
};
2476+
2477+
llvm::SmallVector<mlir::Value, 1> loopOut =
2478+
hlfir::genLoopNestWithReductions(loc, builder, {strLen},
2479+
/*reductionInits=*/{zeroIdx},
2480+
genSearchBody,
2481+
/*isUnordered=*/false);
2482+
mlir::Value result = builder.createConvert(loc, resultTy, loopOut[0]);
2483+
2484+
if (strAssociate)
2485+
hlfir::EndAssociateOp::create(builder, loc, strAssociate);
2486+
if (substrAssociate)
2487+
hlfir::EndAssociateOp::create(builder, loc, substrAssociate);
2488+
2489+
rewriter.replaceOp(op, result);
2490+
return mlir::success();
2491+
}
2492+
};
2493+
22872494
template <typename Op>
22882495
class MatmulConversion : public mlir::OpRewritePattern<Op> {
22892496
public:
@@ -2955,6 +3162,7 @@ class SimplifyHLFIRIntrinsics
29553162
patterns.insert<ArrayShiftConversion<hlfir::CShiftOp>>(context);
29563163
patterns.insert<ArrayShiftConversion<hlfir::EOShiftOp>>(context);
29573164
patterns.insert<CmpCharOpConversion>(context);
3165+
patterns.insert<IndexOpConversion>(context);
29583166
patterns.insert<MatmulConversion<hlfir::MatmulTransposeOp>>(context);
29593167
patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
29603168
patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);

0 commit comments

Comments
 (0)