diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp index 98a225dbec9f9..e26f9ef4c5310 100644 --- a/flang-rt/lib/runtime/character.cpp +++ b/flang-rt/lib/runtime/character.cpp @@ -570,6 +570,35 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x, } } +template +inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen, + const CHAR *set, std::size_t setLen, std::size_t pos, bool back, + const char *sourceFile, int sourceLine) { + Terminator terminator{sourceFile, sourceLine}; + + if (!back) { + RUNTIME_CHECK(terminator, pos <= xLen); + for (std::size_t i{pos + 1}; i <= xLen; ++i) { + for (std::size_t j{0}; j < setLen; ++j) { + if (x[i - 1] == set[j]) { + return i; + } + } + } + return xLen + 1; + } else { + RUNTIME_CHECK(terminator, pos >= 1 && pos <= xLen + 1); + for (std::size_t i{pos - 1}; i != 0; --i) { + for (std::size_t j{0}; j < setLen; ++j) { + if (x[i - 1] == set[j]) { + return i; + } + } + } + return 0; + } +} + extern "C" { RT_EXT_API_GROUP_BEGIN @@ -917,6 +946,24 @@ void RTDEF(CharacterMin)(Descriptor &accumulator, const Descriptor &x, MaxMin(accumulator, x, sourceFile, sourceLine); } +std::size_t RTDEF(Split1)(const char *x, std::size_t xLen, const char *set, + std::size_t setLen, std::size_t pos, bool back, const char *sourceFile, + int sourceLine) { + return Split(x, xLen, set, setLen, pos, back, sourceFile, sourceLine); +} +std::size_t RTDEF(Split2)(const char16_t *x, std::size_t xLen, + const char16_t *set, std::size_t setLen, std::size_t pos, bool back, + const char *sourceFile, int sourceLine) { + return Split( + x, xLen, set, setLen, pos, back, sourceFile, sourceLine); +} +std::size_t RTDEF(Split4)(const char32_t *x, std::size_t xLen, + const char32_t *set, std::size_t setLen, std::size_t pos, bool back, + const char *sourceFile, int sourceLine) { + return Split( + x, xLen, set, setLen, pos, back, sourceFile, sourceLine); +} + RT_EXT_API_GROUP_END } } // namespace Fortran::runtime diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp index 2c7af27b9da77..4c2d8b3ecad5f 100644 --- a/flang-rt/unittests/Runtime/CharacterTest.cpp +++ b/flang-rt/unittests/Runtime/CharacterTest.cpp @@ -430,3 +430,53 @@ TYPED_TEST(RepeatTests, Repeat) { RunRepeatTest(t.ncopies, t.input, t.output); } } + +// Test SPLIT() +template +using SplitFunction = std::function; +using SplitFunctions = CharTypedFunctions; +template struct SplitTests : public ::testing::Test {}; +TYPED_TEST_SUITE(SplitTests, CharacterTypes, ); + +struct SplitTestCase { + const char *x, *y; + std::size_t pos; + bool back; + std::size_t expect; +}; + +template +void RunSplitTests(const std::vector &testCases, + const SplitFunction &function) { + for (const auto &t : testCases) { + // Convert default character to desired kind + std::size_t xLen{std::strlen(t.x)}, yLen{std::strlen(t.y)}; + std::basic_string x{t.x, t.x + xLen}; + std::basic_string y{t.y, t.y + yLen}; + auto got{function(x.data(), xLen, y.data(), yLen, t.pos, t.back, "", 0)}; + ASSERT_EQ(got, t.expect) + << "SPLIT('" << t.x << "','" << t.y << "',pos=" << t.pos + << ",back=" << t.back << ") for CHARACTER(kind=" << sizeof(CHAR) + << "): got " << got << ", expected " << t.expect; + } +} + +TYPED_TEST(SplitTests, Split) { + static SplitFunctions functions{ + RTNAME(Split1), RTNAME(Split2), RTNAME(Split4)}; + static std::vector testcases{ + {" one,last example,", ", ", 0, false, 1}, + {" one,last example,", ", ", 1, false, 5}, + {" one,last example,", ", ", 5, false, 10}, + {" one,last example,", ", ", 10, false, 18}, + {" one,last example,", ", ", 18, false, 19}, + {" one,last example,", ", ", 19, true, 18}, + {" one,last example,", ", ", 18, true, 10}, + {" one,last example,", ", ", 10, true, 5}, + {" one,last example,", ", ", 5, true, 1}, + {" one,last example,", ", ", 1, true, 0}, + }; + RunSplitTests( + testcases, std::get>(functions)); +} diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 320f913858956..8b539b164726c 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -436,6 +436,7 @@ struct IntrinsicLibrary { fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef); mlir::Value genSpacing(mlir::Type resultType, llvm::ArrayRef args); + void genSplit(llvm::ArrayRef); fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genStorageSize(mlir::Type, llvm::ArrayRef); diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h index 261ac348a4024..2ab0652ee0f7d 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h @@ -128,6 +128,14 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind, mlir::Value setBase, mlir::Value setLen, mlir::Value back); +/// Generate call to the split runtime routine that is specialized on +/// \param kind. +/// The \param kind represents the kind of the elements in the strings. +mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind, + mlir::Value stringBase, mlir::Value stringLen, + mlir::Value setBase, mlir::Value setLen, mlir::Value pos, + mlir::Value back); + } // namespace fir::runtime #endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CHARACTER_H diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h index dd47686fe858f..f9bbd6d727d4e 100644 --- a/flang/include/flang/Runtime/character.h +++ b/flang/include/flang/Runtime/character.h @@ -127,6 +127,16 @@ std::size_t RTDECL(Verify4)(const char32_t *, std::size_t, const char32_t *set, void RTDECL(Verify)(Descriptor &result, const Descriptor &string, const Descriptor &set, const Descriptor *back /*can be null*/, int kind, const char *sourceFile = nullptr, int sourceLine = 0); + +std::size_t RTDECL(Split1)(const char *, std::size_t, const char *set, + std::size_t, std::size_t, bool back = false, + const char *sourceFile = nullptr, int sourceLine = 0); +std::size_t RTDECL(Split2)(const char16_t *, std::size_t, const char16_t *set, + std::size_t, std::size_t, bool back = false, + const char *sourceFile = nullptr, int sourceLine = 0); +std::size_t RTDECL(Split4)(const char32_t *, std::size_t, const char32_t *set, + std::size_t, std::size_t, bool back = false, + const char *sourceFile = nullptr, int sourceLine = 0); } } // namespace Fortran::runtime #endif // FORTRAN_RUNTIME_CHARACTER_H_ diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index fe679da4ff98b..310c1a3cd3957 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -1725,6 +1725,10 @@ static const IntrinsicInterface intrinsicSubroutine[]{ {{"seconds", AnyInt, Rank::scalar, Optionality::required, common::Intent::In}}, {}, Rank::elemental, IntrinsicClass::impureSubroutine}, + {"split", + {{"string", SameCharNoLen}, {"set", SameCharNoLen}, {"pos", AnyInt}, + {"back", AnyLogical, Rank::elemental, Optionality::optional}}, + {}, Rank::elemental, IntrinsicClass::pureSubroutine}, {"unlink", {{"path", DefaultChar, Rank::scalar, Optionality::required, common::Intent::In}, diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 71d35e37bbe94..06aab4b3c466d 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -958,6 +958,12 @@ static constexpr IntrinsicHandler handlers[]{ /*isElemental=*/false}, {"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false}, {"spacing", &I::genSpacing}, + {"split", + &I::genSplit, + {{{"string", asAddr}, + {"set", asAddr}, + {"pos", asAddr}, + {"back", asValue, handleDynamicOptional}}}}, {"spread", &I::genSpread, {{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}}, @@ -8763,6 +8769,42 @@ mlir::Value IntrinsicLibrary::genSpacing(mlir::Type resultType, fir::runtime::genSpacing(builder, loc, fir::getBase(args[0]))); } +// SPLIT +void IntrinsicLibrary::genSplit(llvm::ArrayRef args) { + assert(args.size() == 4); + + // Handle required string base arg + mlir::Value stringBase = fir::getBase(args[0]); + + // Handle required set string base arg + mlir::Value setBase = fir::getBase(args[1]); + + // Handle kind argument; it is the kind of character in this case + fir::KindTy kind = + fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind( + stringBase.getType()); + + // Handle string length argument + mlir::Value stringLen = fir::getLen(args[0]); + + // Handle set string length argument + mlir::Value setLen = fir::getLen(args[1]); + + // Handle pos argument + mlir::Value posAddr = fir::getBase(args[2]); + mlir::Value pos = fir::LoadOp::create(builder, loc, posAddr); + + // Handle optional back argument + mlir::Value back = + isStaticallyAbsent(args[3]) + ? builder.createIntegerConstant(loc, builder.getI1Type(), 0) + : fir::getBase(args[3]); + + pos = fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen, + setBase, setLen, pos, back); + builder.createStoreWithConvert(loc, pos, posAddr); +} + // SPREAD fir::ExtendedValue IntrinsicLibrary::genSpread(mlir::Type resultType, diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp index 540ecba299dc3..6d4942e65d77a 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp @@ -290,3 +290,34 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder, stringLen, setBase, setLen, back); return fir::CallOp::create(builder, loc, func, args).getResult(0); } + +mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder, + mlir::Location loc, int kind, + mlir::Value stringBase, + mlir::Value stringLen, mlir::Value setBase, + mlir::Value setLen, mlir::Value pos, + mlir::Value back) { + mlir::func::FuncOp func; + switch (kind) { + case 1: + func = fir::runtime::getRuntimeFunc(loc, builder); + break; + case 2: + func = fir::runtime::getRuntimeFunc(loc, builder); + break; + case 4: + func = fir::runtime::getRuntimeFunc(loc, builder); + break; + default: + fir::emitFatalError( + loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4."); + } + auto fTy = func.getFunctionType(); + auto sourceFile = fir::factory::locationToFilename(builder, loc); + auto sourceLine = + fir::factory::locationToLineNo(builder, loc, fTy.getInput(7)); + auto args = fir::runtime::createArguments(builder, loc, fTy, stringBase, + stringLen, setBase, setLen, pos, + back, sourceFile, sourceLine); + return fir::CallOp::create(builder, loc, func, args).getResult(0); +} diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90 new file mode 100644 index 0000000000000..eecc767fa4ff2 --- /dev/null +++ b/flang/test/Lower/Intrinsics/split.f90 @@ -0,0 +1,74 @@ +! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s + +! CHECK-LABEL: func @_QPsplit_test1( +! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref{{.*}}) +subroutine split_test1(s1, s2, p) +character(*) :: s1, s2 +integer :: p +! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref>, index) +! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) +! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref +! CHECK: %false = arith.constant false +! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref>) -> !fir.ref +! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64 +! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref>) -> !fir.ref +! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64 +! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64 +! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %false, {{.*}}) {{.*}}: (!fir.ref, i64, !fir.ref, i64, i64, i1, !fir.ref, i32) -> i64 +! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32 +! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref +! CHECK: return +call split(s1, s2, p) +end subroutine split_test1 + +! CHECK-LABEL: func @_QPsplit_test2( +! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref{{.*}}) +subroutine split_test2(s1, s2, p) +character(*) :: s1, s2 +integer :: p +! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref>, index) +! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) +! CHECK: %true = arith.constant true +! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref +! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref>) -> !fir.ref +! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64 +! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref>) -> !fir.ref +! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64 +! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64 +! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %true, {{.*}}) {{.*}}: (!fir.ref, i64, !fir.ref, i64, i64, i1, !fir.ref, i32) -> i64 +! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32 +! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref +! CHECK: return +call split(s1, s2, p, .true.) +end subroutine split_test2 + +! CHECK-LABEL: func @_QPsplit_test3( +! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref{{.*}}, %[[back:[^:]+]]: !fir.ref>{{.*}}) +subroutine split_test3(s1, s2, p, back) +character(*) :: s1, s2 +integer :: p +logical, optional :: back +! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref>, index) +! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref>, index) +! CHECK: %[[is_present:.*]] = fir.is_present %arg3 : (!fir.ref>) -> i1 +! CHECK: %[[back_unwrap:.*]] = fir.if %[[is_present]] -> (!fir.logical<4>) { +! CHECK: {{.*}} = fir.load %arg3 : !fir.ref> +! CHECK: fir.result {{.*}} : !fir.logical<4> +! CHECK: } else { +! CHECK: %false = arith.constant false +! CHECK: {{.*}} = fir.convert %false : (i1) -> !fir.logical<4> +! CHECK: fir.result {{.*}} : !fir.logical<4> +! CHECK: } +! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref +! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref>) -> !fir.ref +! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64 +! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref>) -> !fir.ref +! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64 +! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64 +! CHECK: %[[back_convert:.*]] = fir.convert %[[back_unwrap]] : (!fir.logical<4>) -> i1 +! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %[[back_convert]], {{.*}}) {{.*}}: (!fir.ref, i64, !fir.ref, i64, i64, i1, !fir.ref, i32) -> i64 +! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32 +! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref +! CHECK: return +call split(s1, s2, p, back) +end subroutine split_test3 diff --git a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp index f3b0fde175bac..d2344ad64e810 100644 --- a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp +++ b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp @@ -209,3 +209,26 @@ TEST_F(RuntimeCallTest, genVerifyTest) { checkGenVerify(*firBuilder, "_FortranAVerify2", 2); checkGenVerify(*firBuilder, "_FortranAVerify4", 4); } + +void checkGenSplit( + fir::FirOpBuilder &builder, llvm::StringRef fctName, unsigned kind) { + auto loc = builder.getUnknownLoc(); + mlir::Type charTy = fir::CharacterType::get(builder.getContext(), kind, 10); + mlir::Type boxTy = fir::BoxType::get(charTy); + mlir::Type i32Ty = IntegerType::get(builder.getContext(), 32); + mlir::Value stringBase = fir::UndefOp::create(builder, loc, boxTy); + mlir::Value stringLen = fir::UndefOp::create(builder, loc, i32Ty); + mlir::Value setBase = fir::UndefOp::create(builder, loc, boxTy); + mlir::Value setLen = fir::UndefOp::create(builder, loc, i32Ty); + mlir::Value pos = fir::UndefOp::create(builder, loc, i32Ty); + mlir::Value back = fir::UndefOp::create(builder, loc, i32Ty); + mlir::Value res = fir::runtime::genSplit( + builder, loc, kind, stringBase, stringLen, setBase, setLen, pos, back); + checkCallOp(res.getDefiningOp(), fctName, 6); +} + +TEST_F(RuntimeCallTest, genSplitTest) { + checkGenSplit(*firBuilder, "_FortranASplit1", 1); + checkGenSplit(*firBuilder, "_FortranASplit2", 2); + checkGenSplit(*firBuilder, "_FortranASplit4", 4); +}