Permalink
Browse files

Make RegExp character class match JSC.

  • Loading branch information...
1 parent 72f5bcb commit 94bb378ee558ed52411894754a0e1851fc243ed8 lrn@chromium.org committed Dec 13, 2010
Showing with 37 additions and 11 deletions.
  1. +25 −11 src/parser.cc
  2. +12 −0 test/mjsunit/regexp.js
View
36 src/parser.cc
@@ -4449,6 +4449,22 @@ CharacterRange RegExpParser::ParseClassAtom(uc16* char_class) {
}
+static const uc16 kNoCharClass = 0;
+
+// Adds range or pre-defined character class to character ranges.
+// If char_class is not kInvalidClass, it's interpreted as a class
+// escape (i.e., 's' means whitespace, from '\s').
+static inline void AddRangeOrEscape(ZoneList<CharacterRange>* ranges,
+ uc16 char_class,
+ CharacterRange range) {
+ if (char_class != kNoCharClass) {
+ CharacterRange::AddClassEscape(char_class, ranges);
+ } else {
+ ranges->Add(range);
+ }
+}
+
+
RegExpTree* RegExpParser::ParseCharacterClass() {
static const char* kUnterminated = "Unterminated character class";
static const char* kRangeOutOfOrder = "Range out of order in character class";
@@ -4462,36 +4478,34 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
}
ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
while (has_more() && current() != ']') {
- uc16 char_class = 0;
+ uc16 char_class = kNoCharClass;
CharacterRange first = ParseClassAtom(&char_class CHECK_FAILED);
- if (char_class) {
- CharacterRange::AddClassEscape(char_class, ranges);
- continue;
- }
if (current() == '-') {
Advance();
if (current() == kEndMarker) {
// If we reach the end we break out of the loop and let the
// following code report an error.
break;
} else if (current() == ']') {
- ranges->Add(first);
+ AddRangeOrEscape(ranges, char_class, first);
ranges->Add(CharacterRange::Singleton('-'));
break;
}
- CharacterRange next = ParseClassAtom(&char_class CHECK_FAILED);
- if (char_class) {
- ranges->Add(first);
+ uc16 char_class_2 = kNoCharClass;
+ CharacterRange next = ParseClassAtom(&char_class_2 CHECK_FAILED);
+ if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
+ // Either end is an escaped character class. Treat the '-' verbatim.
+ AddRangeOrEscape(ranges, char_class, first);
ranges->Add(CharacterRange::Singleton('-'));
- CharacterRange::AddClassEscape(char_class, ranges);
+ AddRangeOrEscape(ranges, char_class_2, next);
continue;
}
if (first.from() > next.to()) {
return ReportError(CStrVector(kRangeOutOfOrder) CHECK_FAILED);
}
ranges->Add(CharacterRange::Range(first.from(), next.to()));
} else {
- ranges->Add(first);
+ AddRangeOrEscape(ranges, char_class, first);
}
}
if (!has_more()) {
View
12 test/mjsunit/regexp.js
@@ -202,6 +202,17 @@ assertFalse(re.test('\n'));
assertFalse(re.test('a'));
assertFalse(re.test('Z'));
+// First - is treated as range operator, second as literal minus.
+// This follows the specification in parsing, but doesn't throw on
+// the \s at the beginning of the range.
+re = /[\s-0-9]/;
+assertTrue(re.test(' '));
+assertTrue(re.test('\xA0'));
+assertTrue(re.test('-'));
+assertTrue(re.test('0'));
+assertTrue(re.test('9'));
+assertFalse(re.test('1'));
+
// Test beginning and end of line assertions with or without the
// multiline flag.
re = /^\d+/;
@@ -647,3 +658,4 @@ assertEquals(4, re.exec("zimzamzumba").index);
assertEquals(["bc"], re.exec("zimzomzumbc"));
assertFalse(re.test("c"));
assertFalse(re.test(""));
+

0 comments on commit 94bb378

Please sign in to comment.