Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement replace regex using boost #6213

Open
wants to merge 9 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
25 changes: 25 additions & 0 deletions HelpSource/Classes/String.schelp
Expand Up @@ -383,6 +383,31 @@ code::
"[xtz]+nd".matchRegexp("xnd"); // true: any combination of x, t, z
::

method::replaceRegexp
This method is used to replace parts of text.

argument::regex
A perl regular expression (see link::Classes/String#Regular expressions::) with which to match the caller.

argument::with
The link::Classes/String:: to replace the found regex with.

returns:: A link::Classes/String::.

code::
// remove numbers
"g8et t8ho9se 3num5b89ers ou06t o8f h12er56e!".replaceRegexp("[0-9]", "")
-> get those numbers out of here!

// remove capital letters
"HelLO WoRlD".replaceRegexp("(\\\w)", "\\\L$1")
-> hello world

// remove all capital letter unless at the start of a word
"HelLO worLD! I weNT tO Paris yeSTErDay.".replaceRegexp("(\\\S)(\\\S*)", "$1\\\L$2")
-> Hello world! I went to Paris yesterday.
::

method::findRegexp
Perl regular expression search (see link::Classes/String#Regular expressions::). This method searches exhaustively for matches and collects them into an array of pairs, in the format code::[character index, matching string]::.

Expand Down
1 change: 1 addition & 0 deletions SCClassLibrary/Common/Collections/String.sc
Expand Up @@ -130,6 +130,7 @@ String[char] : RawArray {
format { arg ... items; ^this.prFormat( items.collect(_.asString) ) }
prFormat { arg items; _String_Format ^this.primitiveFailed }
matchRegexp { arg string, start = 0, end; _String_Regexp ^this.primitiveFailed }
replaceRegexp { |regex, with| _String_ReplaceRegex ^this.primitiveFailed }

fformat { arg ... args;
var str, resArgs, val, func;
Expand Down
57 changes: 57 additions & 0 deletions lang/LangPrimSource/PyrStringPrim.cpp
Expand Up @@ -269,6 +269,62 @@ class regex_lru_cache {

}


int prString_ReplaceRegex(struct VMGlobals* g, int numArgsPushed) {
// caches the last 64 boost:regex instances.
static detail::regex_lru_cache regex_lru_cache(boost::regex_constants::ECMAScript | boost::regex_constants::nosubs);


PyrSlot* slot_this = g->sp - 2; // source string
PyrSlot* slot_regex = g->sp - 1; // find
PyrSlot* slot_replace = g->sp; // replace with

// slot one does not need to be checked as this method should only be called from methods in String,
// or children thereof.
if (!isKindOfSlot(slot_regex, class_string)) {
SetNil(slot_this);
return errWrongType;
}
if (!isKindOfSlot(slot_replace, class_string)) {
SetNil(slot_this);
return errWrongType;
}

try {
const auto& pattern = regex_lru_cache.get_regex(slotRawString(slot_regex)->s, slotRawString(slot_regex)->size);

const char* source_start = slotRawString(slot_this)->s;
const int source_size = slotRawString(slot_this)->size;

if (source_size < 0) { // size is signed
SetNil(slot_this);
return errIntegerOverflow;
}

std::string out {};
// PyrStrings are not null terminated so a copy is needed.
const auto [replaceError, replace] = slotStrStdStrVal(slot_replace);
if (replaceError != errNone) {
SetNil(slot_this);
return replaceError;
}

boost::regex_replace(std::back_inserter(out), source_start, source_start + source_size, pattern, replace);

if (out.size() > std::numeric_limits<decltype(PyrObjectHdr {}.size)>::max()) {
SetNil(slot_this);
return errIntegerOverflow;
}
SetObject(slot_this, newPyrStringN(g->gc, static_cast<int>(out.size()), 0, true));
std::copy(out.begin(), out.end(), slotRawString(slot_this)->s);
return errNone;
} catch (const std::exception& e) {
postfl("Warning: Exception in _String_ReplaceRegex -%s\n", e.what());
SetNil(slot_this);
return errFailed;
};
}

int prString_Regexp(struct VMGlobals* g, int numArgsPushed) {
/* not reentrant */
static detail::regex_lru_cache regex_lru_cache(boost::regex_constants::ECMAScript | boost::regex_constants::nosubs);
Expand Down Expand Up @@ -1002,4 +1058,5 @@ void initStringPrimitives() {
definePrimitive(base, index++, "_String_EscapeChar", prString_EscapeChar, 2, 0);
definePrimitive(base, index++, "_String_ParseYAML", prString_ParseYAML, 1, 0);
definePrimitive(base, index++, "_String_ParseYAMLFile", prString_ParseYAMLFile, 1, 0);
definePrimitive(base, index++, "_String_ReplaceRegex", prString_ReplaceRegex, 3, 0);
}
1 change: 1 addition & 0 deletions lang/LangSource/PyrErrors.h
Expand Up @@ -39,6 +39,7 @@ enum { // primitive errors
errOutOfMemory,
errCantCallOS,
errException,
errIntegerOverflow,

errPropertyNotFound = 6000,

Expand Down
30 changes: 30 additions & 0 deletions testsuite/classlibrary/TestStringReplaceRegex.sc
@@ -0,0 +1,30 @@
TestStringReplaceRegex : UnitTest {
test_replace_simple {
this.assertEquals(
"foo, foo, bar, foo".replaceRegexp("foo", "car"),
"car, car, bar, car"
);
}
test_replace_empty {
this.assertEquals(
"".replaceRegexp("foo", "car"),
""
);
}
test_replace_more_complex {
this.assertEquals(
"texttextte9xtte823x234t".replaceRegexp("[0-9]", ""),
"texttexttexttext"
);
this.assertEquals(
"hello--Remove--rem0ve".replaceRegexp("(r|R)em(o|0)ve", ""),
"hello----"
);
}
test_replace_captures {
this.assertEquals(
"HelLO WoRlD".replaceRegexp("(\\\w)", "\\\L$1"),
"hello world"
)
}
}