Skip to content

Commit

Permalink
Fixes #162, changes meaning of '.', '^', '$' in RegExp multiline mode.
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Bartrum committed Oct 1, 2019
1 parent f734c96 commit 6c15623
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 4 deletions.
66 changes: 62 additions & 4 deletions Jurassic/Library/RegExp/RegExpInstance.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
using System.Text.RegularExpressions;

namespace Jurassic.Library
Expand All @@ -14,7 +15,7 @@ public partial class RegExpInstance : ObjectInstance
{
private Regex value;
private bool globalSearch;

private string source;


// INITIALIZATION
Expand All @@ -37,7 +38,7 @@ internal RegExpInstance(ObjectInstance prototype, string pattern, string flags =

try
{
this.value = new Regex(pattern, ParseFlags(flags));
this.value = CreateRegex(pattern, ParseFlags(flags));
}
catch (ArgumentException ex)
{
Expand Down Expand Up @@ -132,7 +133,7 @@ public override string DebuggerDisplayType
[JSProperty(Name = "source")]
public string Source
{
get { return this.value.ToString(); }
get { return this.source ?? this.value.ToString(); }
}

/// <summary>
Expand Down Expand Up @@ -212,7 +213,7 @@ public int LastIndex
[JSInternalFunction(Deprecated = true, Name = "compile")]
public void Compile(string pattern, string flags = null)
{
this.value = new Regex(pattern, ParseFlags(flags) | RegexOptions.Compiled);
this.value = CreateRegex(pattern, ParseFlags(flags) | RegexOptions.Compiled);

// Update the javascript properties.
this.FastSetProperty("source", pattern);
Expand Down Expand Up @@ -592,5 +593,62 @@ private RegexOptions ParseFlags(string flags)
}
return options;
}

/// <summary>
/// Creates a .NET Regex object using the given pattern and options.
/// </summary>
/// <param name="pattern"> The pattern string. </param>
/// <param name="options"> The regular expression options. </param>
/// <returns> A constructed .NET Regex object. </returns>
private Regex CreateRegex(string pattern, RegexOptions options)
{
if ((options & RegexOptions.Multiline) == RegexOptions.Multiline)
{
// In the .NET Regex implementation with multiline mode:
// '.' matches any character except \n
// '^' matches the start of the string or \n (positive lookbehind)
// '$' matches the end of the string or \n (positive lookahead)
// In Javascript, we want all three characters to also match \r in the same way they match \n.

StringBuilder builder = null;
int start = 0, end = -1;
while (end < pattern.Length)
{
end = pattern.IndexOfAny(new char[] { '.', '^', '$' }, end + 1);
if (end == -1)
break;
if (end > 0 && pattern[end - 1] == '\\')
continue;
if (builder == null)
builder = new StringBuilder();
builder.Append(pattern.Substring(start, end - start));
switch (pattern[end])
{
case '.':
builder.Append(@"[^\r\n]");
break;
case '^':
// [^abc] is a thing. The ^ does NOT match the start of the line in this case.
if (end > 0 && pattern[end - 1] == '[')
builder.Append('^');
else
builder.Append(@"(?<=^|\r)");
break;
case '$':
builder.Append(@"(?=$|\r)");
break;
}
start = end + 1;
}
if (builder != null)
{
this.source = pattern;
builder.Append(pattern.Substring(start));
pattern = builder.ToString();
}
}

return new Regex(pattern, options);
}
}
}
9 changes: 9 additions & 0 deletions Unit Tests/Library/RegExpTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,15 @@ public void multiline()
Assert.AreEqual(false, Evaluate("/abc/.multiline"));
Assert.AreEqual(true, Evaluate("/abc/m.multiline"));

// In multiline mode, the meaning of '.', '^' and '$' changes to match both '\r' and '\n' as well as the start and end of the string.
// Note that in the .NET native Regex these characters only match '\n', not '\r'.
Assert.AreEqual("one,two", Evaluate(@"'one\ntwo'.match(/^.*$/mg).toString()"));
Assert.AreEqual("one,two,", Evaluate(@"'one\ntwo\n'.match(/^.*$/mg).toString()"));
Assert.AreEqual("one,,two", Evaluate(@"'one\r\ntwo'.match(/^.*$/mg).toString()"));
Assert.AreEqual("$", Evaluate(@"'one$two'.match(/\$/mg).toString()"));
Assert.AreEqual("^.*$", Evaluate(@"/^.*$/mg.source"));
Assert.AreEqual("one: two,one,two", Evaluate(@"'one: two\nthree: four'.match(/^([^:]+):\s*(.*)$/m).toString()"));

// This property is non-writable, configurable and non-enumerable.
Assert.AreEqual(false, Evaluate("var x = new RegExp('abc'); x.multiline = true; x.multiline"));
Assert.AreEqual(true, Evaluate("var x = new RegExp('abc'); x.multiline = true; delete x.multiline"));
Expand Down

0 comments on commit 6c15623

Please sign in to comment.