-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[flang] implement split
for fortran 2023
#161484
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
split
for fortran 2023
@llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-flang-semantics Author: Connector Switch (c8ef) Changesref: F2023, 16.9.196 Full diff: https://github.com/llvm/llvm-project/pull/161484.diff 9 Files Affected:
diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp
index 98a225dbec9f9..64aa8a24c0e64 100644
--- a/flang-rt/lib/runtime/character.cpp
+++ b/flang-rt/lib/runtime/character.cpp
@@ -570,6 +570,30 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x,
}
}
+template <typename CHAR>
+inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen,
+ const CHAR *set, std::size_t setLen, std::size_t pos, bool back) {
+ if (!back) {
+ for (std::size_t i{pos + 1}; i <= xLen; ++i) {
+ for (std::size_t j{0}; j < setLen; ++j) {
+ if (x[i - 1] == set[j]) {
+ return i;
+ }
+ }
+ }
+ return xLen + 1;
+ } else {
+ for (std::size_t i{pos - 1}; i != 0; --i) {
+ for (std::size_t j{0}; j < setLen; ++j) {
+ if (x[i - 1] == set[j]) {
+ return i;
+ }
+ }
+ }
+ return 0;
+ }
+}
+
extern "C" {
RT_EXT_API_GROUP_BEGIN
@@ -917,6 +941,19 @@ void RTDEF(CharacterMin)(Descriptor &accumulator, const Descriptor &x,
MaxMin<true>(accumulator, x, sourceFile, sourceLine);
}
+std::size_t RTDEF(Split1)(const char *x, std::size_t xLen, const char *set,
+ std::size_t setLen, std::size_t pos, bool back) {
+ return Split<char>(x, xLen, set, setLen, pos, back);
+}
+std::size_t RTDEF(Split2)(const char16_t *x, std::size_t xLen,
+ const char16_t *set, std::size_t setLen, std::size_t pos, bool back) {
+ return Split<char16_t>(x, xLen, set, setLen, pos, back);
+}
+std::size_t RTDEF(Split4)(const char32_t *x, std::size_t xLen,
+ const char32_t *set, std::size_t setLen, std::size_t pos, bool back) {
+ return Split<char32_t>(x, xLen, set, setLen, pos, back);
+}
+
RT_EXT_API_GROUP_END
}
} // namespace Fortran::runtime
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index cd73798d71262..6c668af9adf2c 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -430,6 +430,7 @@ struct IntrinsicLibrary {
fir::ExtendedValue genSizeOf(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genSpacing(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args);
+ void genSplit(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genSpread(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genStorageSize(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Character.h b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
index d1c521de94438..a5ac3e175fe64 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Character.h
@@ -120,6 +120,14 @@ mlir::Value genVerify(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
mlir::Value setBase, mlir::Value setLen,
mlir::Value back);
+/// Generate call to the split runtime routine that is specialized on
+/// \param kind.
+/// The \param kind represents the kind of the elements in the strings.
+mlir::Value genSplit(fir::FirOpBuilder &builder, mlir::Location loc, int kind,
+ mlir::Value stringBase, mlir::Value stringLen,
+ mlir::Value setBase, mlir::Value setLen, mlir::Value pos,
+ mlir::Value back);
+
} // namespace fir::runtime
#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_CHARACTER_H
diff --git a/flang/include/flang/Runtime/character.h b/flang/include/flang/Runtime/character.h
index dd47686fe858f..2b94b3cc2d1d9 100644
--- a/flang/include/flang/Runtime/character.h
+++ b/flang/include/flang/Runtime/character.h
@@ -127,6 +127,13 @@ std::size_t RTDECL(Verify4)(const char32_t *, std::size_t, const char32_t *set,
void RTDECL(Verify)(Descriptor &result, const Descriptor &string,
const Descriptor &set, const Descriptor *back /*can be null*/, int kind,
const char *sourceFile = nullptr, int sourceLine = 0);
+
+std::size_t RTDECL(Split1)(const char *, std::size_t, const char *set,
+ std::size_t, std::size_t, bool back = false);
+std::size_t RTDECL(Split2)(const char16_t *, std::size_t, const char16_t *set,
+ std::size_t, std::size_t, bool back = false);
+std::size_t RTDECL(Split4)(const char32_t *, std::size_t, const char32_t *set,
+ std::size_t, std::size_t, bool back = false);
}
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_CHARACTER_H_
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index abe53c31210d0..c80778ccaa547 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -1721,6 +1721,10 @@ static const IntrinsicInterface intrinsicSubroutine[]{
{{"seconds", AnyInt, Rank::scalar, Optionality::required,
common::Intent::In}},
{}, Rank::elemental, IntrinsicClass::impureSubroutine},
+ {"split",
+ {{"string", SameCharNoLen}, {"set", SameCharNoLen}, {"pos", AnyInt},
+ {"back", AnyLogical, Rank::elemental, Optionality::optional}},
+ {}, Rank::elemental, IntrinsicClass::pureSubroutine},
{"unlink",
{{"path", DefaultChar, Rank::scalar, Optionality::required,
common::Intent::In},
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index e1c9520592de6..a9d2ccc1ff868 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -926,6 +926,12 @@ static constexpr IntrinsicHandler handlers[]{
/*isElemental=*/false},
{"sleep", &I::genSleep, {{{"seconds", asValue}}}, /*isElemental=*/false},
{"spacing", &I::genSpacing},
+ {"split",
+ &I::genSplit,
+ {{{"string", asAddr},
+ {"set", asAddr},
+ {"pos", asAddr},
+ {"back", asValue, handleDynamicOptional}}}},
{"spread",
&I::genSpread,
{{{"source", asBox}, {"dim", asValue}, {"ncopies", asValue}}},
@@ -8655,6 +8661,42 @@ mlir::Value IntrinsicLibrary::genSpacing(mlir::Type resultType,
fir::runtime::genSpacing(builder, loc, fir::getBase(args[0])));
}
+// SPLIT
+void IntrinsicLibrary::genSplit(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+
+ // Handle required string base arg
+ mlir::Value stringBase = fir::getBase(args[0]);
+
+ // Handle required set string base arg
+ mlir::Value setBase = fir::getBase(args[1]);
+
+ // Handle kind argument; it is the kind of character in this case
+ fir::KindTy kind =
+ fir::factory::CharacterExprHelper{builder, loc}.getCharacterKind(
+ stringBase.getType());
+
+ // Handle string length argument
+ mlir::Value stringLen = fir::getLen(args[0]);
+
+ // Handle set string length argument
+ mlir::Value setLen = fir::getLen(args[1]);
+
+ // Handle pos argument
+ mlir::Value posAddr = fir::getBase(args[2]);
+ mlir::Value pos = fir::LoadOp::create(builder, loc, posAddr);
+
+ // Handle optional back argument
+ mlir::Value back =
+ isStaticallyAbsent(args[3])
+ ? builder.createIntegerConstant(loc, builder.getI1Type(), 0)
+ : fir::getBase(args[3]);
+
+ pos = fir::runtime::genSplit(builder, loc, kind, stringBase, stringLen,
+ setBase, setLen, pos, back);
+ builder.createStoreWithConvert(loc, pos, posAddr);
+}
+
// SPREAD
fir::ExtendedValue
IntrinsicLibrary::genSpread(mlir::Type resultType,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Character.cpp b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
index 57fb0cccf6863..bd112082711d6 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Character.cpp
@@ -276,3 +276,30 @@ mlir::Value fir::runtime::genVerify(fir::FirOpBuilder &builder,
stringLen, setBase, setLen, back);
return fir::CallOp::create(builder, loc, func, args).getResult(0);
}
+
+mlir::Value fir::runtime::genSplit(fir::FirOpBuilder &builder,
+ mlir::Location loc, int kind,
+ mlir::Value stringBase,
+ mlir::Value stringLen, mlir::Value setBase,
+ mlir::Value setLen, mlir::Value pos,
+ mlir::Value back) {
+ mlir::func::FuncOp func;
+ switch (kind) {
+ case 1:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split1)>(loc, builder);
+ break;
+ case 2:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split2)>(loc, builder);
+ break;
+ case 4:
+ func = fir::runtime::getRuntimeFunc<mkRTKey(Split4)>(loc, builder);
+ break;
+ default:
+ fir::emitFatalError(
+ loc, "unsupported CHARACTER kind value. Runtime expects 1, 2, or 4.");
+ }
+ auto fTy = func.getFunctionType();
+ auto args = fir::runtime::createArguments(
+ builder, loc, fTy, stringBase, stringLen, setBase, setLen, pos, back);
+ return fir::CallOp::create(builder, loc, func, args).getResult(0);
+}
diff --git a/flang/test/Lower/Intrinsics/split.f90 b/flang/test/Lower/Intrinsics/split.f90
new file mode 100644
index 0000000000000..d0d2f48cddf16
--- /dev/null
+++ b/flang/test/Lower/Intrinsics/split.f90
@@ -0,0 +1,43 @@
+! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPsplit_test1(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test1(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %false = arith.constant false
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %false) fastmath<contract> : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p)
+end subroutine split_test1
+
+! CHECK-LABEL: func @_QPsplit_test2(
+! CHECK-SAME: %[[s1:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[s2:[^:]+]]: !fir.boxchar<1>{{.*}}, %[[p:[^:]+]]: !fir.ref<i32>{{.*}})
+subroutine split_test2(s1, s2, p)
+character(*) :: s1, s2
+integer :: p
+! CHECK: %[[c1:.*]]:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[c2:.*]]:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %true = arith.constant true
+! CHECK: %[[pos:.*]] = fir.load %arg2 : !fir.ref<i32>
+! CHECK: %[[c1base:.*]] = fir.convert %[[c1]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c1len:.*]] = fir.convert %[[c1]]#1 : (index) -> i64
+! CHECK: %[[c2base:.*]] = fir.convert %[[c2]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! CHECK: %[[c2len:.*]] = fir.convert %[[c2]]#1 : (index) -> i64
+! CHECK: %[[pos1:.*]] = fir.convert %[[pos]] : (i32) -> i64
+! CHECK: %[[pos2:.*]] = fir.call @_FortranASplit1(%[[c1base]], %[[c1len]], %[[c2base]], %[[c2len]], %[[pos1]], %true) fastmath<contract> : (!fir.ref<i8>, i64, !fir.ref<i8>, i64, i64, i1) -> i64
+! CHECK: %[[pos3:.*]] = fir.convert %[[pos2]] : (i64) -> i32
+! CHECK: fir.store %[[pos3]] to %[[p]] : !fir.ref<i32>
+! CHECK: return
+call split(s1, s2, p, .true.)
+end subroutine split_test2
diff --git a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
index f3b0fde175bac..f8db671ad845c 100644
--- a/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
+++ b/flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp
@@ -209,3 +209,26 @@ TEST_F(RuntimeCallTest, genVerifyTest) {
checkGenVerify(*firBuilder, "_FortranAVerify2", 2);
checkGenVerify(*firBuilder, "_FortranAVerify4", 4);
}
+
+void checkGenSplit(
+ fir::FirOpBuilder &builder, llvm::StringRef fctName, unsigned kind) {
+ auto loc = builder.getUnknownLoc();
+ mlir::Type charTy = fir::CharacterType::get(builder.getContext(), kind, 10);
+ mlir::Type boxTy = fir::BoxType::get(charTy);
+ mlir::Type i32Ty = IntegerType::get(builder.getContext(), 32);
+ mlir::Value stringBase = fir::UndefOp::create(builder, loc, boxTy);
+ mlir::Value stringLen = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value setBase = fir::UndefOp::create(builder, loc, boxTy);
+ mlir::Value setLen = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value pos = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value back = fir::UndefOp::create(builder, loc, i32Ty);
+ mlir::Value res = fir::runtime::genSplit(
+ builder, loc, kind, stringBase, stringLen, setBase, setLen, pos, back);
+ checkCallOp(res.getDefiningOp(), fctName, 6, /*addLocArgs=*/false);
+}
+
+TEST_F(RuntimeCallTest, genSplitTest) {
+ checkGenSplit(*firBuilder, "_FortranASplit1", 1);
+ checkGenSplit(*firBuilder, "_FortranASplit2", 2);
+ checkGenSplit(*firBuilder, "_FortranASplit4", 4);
+}
|
flang-rt/lib/runtime/character.cpp
Outdated
@@ -570,6 +570,30 @@ static RT_API_ATTRS void MaxMin(Descriptor &accumulator, const Descriptor &x, | |||
} | |||
} | |||
|
|||
template <typename CHAR> | |||
inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen, | |||
const CHAR *set, std::size_t setLen, std::size_t pos, bool back) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't it better to pass pos
as a signed integer and verify that it satisfies the requirements specified in the standard?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the review! I have added the runtime check for pos
as specified in the standard.
BTW, I have kept the pos
's type as size_t. If a negative value is passed, it will wrap up to a large unsigned value and still be functional under the current check. If a signed type is preferred, I can make that change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would prefer to avoid relying on overflow behaviour here as it can be easily avoided.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tried it, but it seems that comparing xLen and pos is unavoidable. If pos is signed, we would need to use many static_cast
s, which is not as clean as the current implementation. Since unsigned integer overflow is not UB, perhaps this could be a viable solution?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
size_t
is the correct type to use for character lengths. int64_t
is the correct type to use for Fortran INTEGER
values.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for your work so far on this.
Please could you add unit tests for the actual runtime function (in flang-rt/unittesets/Runtime).
(I will be on holiday next week, to avoid blocking the PR I'm happy for the other reviewers to verify my comments are addressed and for you to merge with their approval)
Done. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the updates. LGTM but please wait for a second review
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In Flang/docs/F202X.md there is a means of implementing SPLIT
in terms of SCAN
. Perhaps you could make SPLIT
faster than SCAN
, but if not, a second implementation of the same algorithm is redundant; they should be combined, or SPLIT
should be a wrapper around SCAN
.
Documentation is missing in flang/docs/Intrinsics.md.
if (!back) { | ||
RUNTIME_CHECK(terminator, pos <= xLen); | ||
for (std::size_t i{pos + 1}; i <= xLen; ++i) { | ||
for (std::size_t j{0}; j < setLen; ++j) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are O(n^2) implementations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, it has the same time complexity as the fallback implementations for scan/verify.
I noticed that the specialized version of scan/verify for char is not being used here. I'm not sure if this is intentional or accidental. If it's accidental, #161767 will partially address this.
|
||
template <typename CHAR> | ||
inline RT_API_ATTRS std::size_t Split(const CHAR *x, std::size_t xLen, | ||
const CHAR *set, std::size_t setLen, std::size_t pos, bool back, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
std::int64_t
is the correct type to use for Fortran INTEGER
argument values.
I tried to create a generalized |
My point was that |
ref: F2023, 16.9.196