diff --git a/HelpSource/Classes/String.schelp b/HelpSource/Classes/String.schelp index 98be127e9c1..ada09a86f4b 100644 --- a/HelpSource/Classes/String.schelp +++ b/HelpSource/Classes/String.schelp @@ -383,6 +383,31 @@ code:: "[xtz]+nd".matchRegexp("xnd"); // true: any combination of x, t, z :: +method::replaceRegexp +This method is used to replace parts of text. + +argument::regex +A perl regular expression (see link::Classes/String#Regular expressions::) with which to match the caller. + +argument::with +The link::Classes/String:: to replace the found regex with. + +returns:: A link::Classes/String::. + +code:: +// remove numbers +"g8et t8ho9se 3num5b89ers ou06t o8f h12er56e!".replaceRegexp("[0-9]", "") +-> get those numbers out of here! + +// remove capital letters +"HelLO WoRlD".replaceRegexp("(\\\w)", "\\\L$1") +-> hello world + +// remove all capital letter unless at the start of a word +"HelLO worLD! I weNT tO Paris yeSTErDay.".replaceRegexp("(\\\S)(\\\S*)", "$1\\\L$2") +-> Hello world! I went to Paris yesterday. +:: + method::findRegexp Perl regular expression search (see link::Classes/String#Regular expressions::). This method searches exhaustively for matches and collects them into an array of pairs, in the format code::[character index, matching string]::. diff --git a/SCClassLibrary/Common/Collections/String.sc b/SCClassLibrary/Common/Collections/String.sc index b9b9ecc7775..ca0e76b3929 100644 --- a/SCClassLibrary/Common/Collections/String.sc +++ b/SCClassLibrary/Common/Collections/String.sc @@ -130,6 +130,7 @@ String[char] : RawArray { format { arg ... items; ^this.prFormat( items.collect(_.asString) ) } prFormat { arg items; _String_Format ^this.primitiveFailed } matchRegexp { arg string, start = 0, end; _String_Regexp ^this.primitiveFailed } + replaceRegexp { |regex, with| _String_ReplaceRegex ^this.primitiveFailed } fformat { arg ... args; var str, resArgs, val, func; diff --git a/lang/LangPrimSource/PyrStringPrim.cpp b/lang/LangPrimSource/PyrStringPrim.cpp index 11feb41577a..992cf267cd8 100644 --- a/lang/LangPrimSource/PyrStringPrim.cpp +++ b/lang/LangPrimSource/PyrStringPrim.cpp @@ -269,6 +269,62 @@ class regex_lru_cache { } + +int prString_ReplaceRegex(struct VMGlobals* g, int numArgsPushed) { + // caches the last 64 boost:regex instances. + static detail::regex_lru_cache regex_lru_cache(boost::regex_constants::ECMAScript | boost::regex_constants::nosubs); + + + PyrSlot* slot_this = g->sp - 2; // source string + PyrSlot* slot_regex = g->sp - 1; // find + PyrSlot* slot_replace = g->sp; // replace with + + // slot one does not need to be checked as this method should only be called from methods in String, + // or children thereof. + if (!isKindOfSlot(slot_regex, class_string)) { + SetNil(slot_this); + return errWrongType; + } + if (!isKindOfSlot(slot_replace, class_string)) { + SetNil(slot_this); + return errWrongType; + } + + try { + const auto& pattern = regex_lru_cache.get_regex(slotRawString(slot_regex)->s, slotRawString(slot_regex)->size); + + const char* source_start = slotRawString(slot_this)->s; + const int source_size = slotRawString(slot_this)->size; + + if (source_size < 0) { // size is signed + SetNil(slot_this); + return errIntegerOverflow; + } + + std::string out {}; + // PyrStrings are not null terminated so a copy is needed. + const auto [replaceError, replace] = slotStrStdStrVal(slot_replace); + if (replaceError != errNone) { + SetNil(slot_this); + return replaceError; + } + + boost::regex_replace(std::back_inserter(out), source_start, source_start + source_size, pattern, replace); + + if (out.size() > std::numeric_limits::max()) { + SetNil(slot_this); + return errIntegerOverflow; + } + SetObject(slot_this, newPyrStringN(g->gc, static_cast(out.size()), 0, true)); + std::copy(out.begin(), out.end(), slotRawString(slot_this)->s); + return errNone; + } catch (const std::exception& e) { + postfl("Warning: Exception in _String_ReplaceRegex -%s\n", e.what()); + SetNil(slot_this); + return errFailed; + }; +} + int prString_Regexp(struct VMGlobals* g, int numArgsPushed) { /* not reentrant */ static detail::regex_lru_cache regex_lru_cache(boost::regex_constants::ECMAScript | boost::regex_constants::nosubs); @@ -1002,4 +1058,5 @@ void initStringPrimitives() { definePrimitive(base, index++, "_String_EscapeChar", prString_EscapeChar, 2, 0); definePrimitive(base, index++, "_String_ParseYAML", prString_ParseYAML, 1, 0); definePrimitive(base, index++, "_String_ParseYAMLFile", prString_ParseYAMLFile, 1, 0); + definePrimitive(base, index++, "_String_ReplaceRegex", prString_ReplaceRegex, 3, 0); } diff --git a/lang/LangSource/PyrErrors.h b/lang/LangSource/PyrErrors.h index 29dfd39b571..c70cfac9112 100644 --- a/lang/LangSource/PyrErrors.h +++ b/lang/LangSource/PyrErrors.h @@ -39,6 +39,7 @@ enum { // primitive errors errOutOfMemory, errCantCallOS, errException, + errIntegerOverflow, errPropertyNotFound = 6000, diff --git a/testsuite/classlibrary/TestStringReplaceRegex.sc b/testsuite/classlibrary/TestStringReplaceRegex.sc new file mode 100644 index 00000000000..4a0720cb4db --- /dev/null +++ b/testsuite/classlibrary/TestStringReplaceRegex.sc @@ -0,0 +1,30 @@ +TestStringReplaceRegex : UnitTest { + test_replace_simple { + this.assertEquals( + "foo, foo, bar, foo".replaceRegexp("foo", "car"), + "car, car, bar, car" + ); + } + test_replace_empty { + this.assertEquals( + "".replaceRegexp("foo", "car"), + "" + ); + } + test_replace_more_complex { + this.assertEquals( + "texttextte9xtte823x234t".replaceRegexp("[0-9]", ""), + "texttexttexttext" + ); + this.assertEquals( + "hello--Remove--rem0ve".replaceRegexp("(r|R)em(o|0)ve", ""), + "hello----" + ); + } + test_replace_captures { + this.assertEquals( + "HelLO WoRlD".replaceRegexp("(\\\w)", "\\\L$1"), + "hello world" + ) + } +} \ No newline at end of file