Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix capturing group numbering bug and a few more regex-related issues #1614

Merged
merged 1 commit into from
Aug 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 0 additions & 6 deletions Jint.Tests.Test262/Test262Harness.settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,8 @@
"language/literals/regexp/named-groups/forward-reference.js",

// RegExp handling problems
"built-ins/RegExp/named-groups/non-unicode-match.js",
"built-ins/RegExp/named-groups/non-unicode-property-names-valid.js",
"built-ins/RegExp/named-groups/unicode-match.js",
"built-ins/RegExp/named-groups/unicode-property-names-valid.js",
"built-ins/RegExp/prototype/exec/S15.10.6.2_A1_T6.js",
"built-ins/String/prototype/split/separator-regexp.js",
"language/literals/regexp/u-case-mapping.js",
"language/literals/regexp/u-surrogate-pairs-atom-escape-decimal.js",

// requires investigation how to process complex function name evaluation for property
"built-ins/Function/prototype/toString/method-computed-property-name.js",
Expand Down
2 changes: 1 addition & 1 deletion Jint/Jint.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Esprima" Version="3.0.0-rc-03" />
<PackageReference Include="Esprima" Version="3.0.0-rc-04" />
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="all" />
<PackageReference Include="Nullable" Version="1.3.1" PrivateAssets="all" />
<PackageReference Include="PolySharp" Version="1.13.1">
Expand Down
3 changes: 3 additions & 0 deletions Jint/Native/RegExp/JsRegExp.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Text.RegularExpressions;
using Esprima;
using Jint.Native.Object;
using Jint.Runtime;
using Jint.Runtime.Descriptors;
Expand Down Expand Up @@ -62,6 +63,8 @@ public string Flags
}
}

public RegExpParseResult ParseResult { get; set; }

public bool DotAll { get; private set; }
public bool Global { get; private set; }
public bool Indices { get; private set; }
Expand Down
12 changes: 7 additions & 5 deletions Jint/Native/RegExp/RegExpConstructor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,14 +104,15 @@ private ObjectInstance RegExpInitialize(JsRegExp r, JsValue pattern, JsValue fla

try
{
var regExp = Scanner.AdaptRegExp(p, f, compiled: false, _engine.Options.Constraints.RegexTimeout);
var regExpParseResult = Scanner.AdaptRegExp(p, f, compiled: false, _engine.Options.Constraints.RegexTimeout);

if (regExp is null)
if (!regExpParseResult.Success)
{
ExceptionHelper.ThrowSyntaxError(_realm, $"Unsupported regular expression: '/{p}/{flags}'");
ExceptionHelper.ThrowSyntaxError(_realm, $"Unsupported regular expression. {regExpParseResult.ConversionError!.Description}");
}

r.Value = regExp;
r.Value = regExpParseResult.Regex!;
r.ParseResult = regExpParseResult;
}
catch (Exception ex)
{
Expand All @@ -135,12 +136,13 @@ private JsRegExp RegExpAlloc(JsValue newTarget)
return r;
}

public JsRegExp Construct(Regex regExp, string source, string flags)
public JsRegExp Construct(Regex regExp, string source, string flags, RegExpParseResult regExpParseResult = default)
{
var r = RegExpAlloc(this);
r.Value = regExp;
r.Source = source;
r.Flags = flags;
r.ParseResult = regExpParseResult;

RegExpInitialize(r);

Expand Down
80 changes: 32 additions & 48 deletions Jint/Native/RegExp/RegExpPrototype.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Diagnostics.CodeAnalysis;
using System.Text;
using System.Text.RegularExpressions;
using Jint.Collections;
using Jint.Native.Number;
Expand Down Expand Up @@ -168,16 +167,17 @@ private JsValue Replace(JsValue thisObject, JsValue[] arguments)
{
string Evaluator(Match match)
{
var replacerArgs = new List<JsValue>(match.Groups.Count + 2);
var actualGroupCount = GetActualRegexGroupCount(rei, match);
var replacerArgs = new List<JsValue>(actualGroupCount + 2);
replacerArgs.Add(match.Value);

ObjectInstance? groups = null;
for (var i = 1; i < match.Groups.Count; i++)
for (var i = 1; i < actualGroupCount; i++)
{
var capture = match.Groups[i];
replacerArgs.Add(capture.Success ? capture.Value : Undefined);

var groupName = GetRegexGroupName(rei.Value, i);
var groupName = GetRegexGroupName(rei, i);
if (!string.IsNullOrWhiteSpace(groupName))
{
groups ??= OrdinaryObjectCreate(_engine, null);
Expand Down Expand Up @@ -475,21 +475,13 @@ private JsValue Split(JsValue thisObject, JsValue[] arguments)
}

var a = _realm.Intrinsics.Array.Construct(Arguments.Empty);
var match = R.Value.Match(s, 0);

if (!match.Success) // No match at all return the string in an array
{
a.SetIndexValue(0, s, updateLength: true);
return a;
}

int lastIndex = 0;
uint index = 0;
while (match.Success && index < lim)
for (var match = R.Value.Match(s, 0); match.Success; match = match.NextMatch())
{
if (match.Length == 0 && (match.Index == 0 || match.Index == s.Length || match.Index == lastIndex))
{
match = match.NextMatch();
continue;
}

Expand All @@ -502,7 +494,8 @@ private JsValue Split(JsValue thisObject, JsValue[] arguments)
}

lastIndex = match.Index + match.Length;
for (int i = 1; i < match.Groups.Count; i++)
var actualGroupCount = GetActualRegexGroupCount(R, match);
for (int i = 1; i < actualGroupCount; i++)
{
var group = match.Groups[i];
var item = Undefined;
Expand All @@ -518,14 +511,11 @@ private JsValue Split(JsValue thisObject, JsValue[] arguments)
return a;
}
}

match = match.NextMatch();
if (!match.Success) // Add the last part of the split
{
a.SetIndexValue(index++, s.Substring(lastIndex), updateLength: true);
}
}

// Add the last part of the split
a.SetIndexValue(index, s.Substring(lastIndex), updateLength: true);

return a;
}

Expand Down Expand Up @@ -720,7 +710,7 @@ private JsValue Match(JsValue thisObject, JsValue[] arguments)
match = match.NextMatch();
if (!match.Success || match.Index != ++li)
break;
a.SetIndexValue(li, match.Value, updateLength: false);
a.SetIndexValue(li, match.Value, updateLength: false);
}
a.SetLength(li);
return a;
Expand Down Expand Up @@ -898,7 +888,7 @@ private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
return Null;
}

return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false, hasIndices: false);
return CreateReturnValueArray(R, m, s, fullUnicode: false, hasIndices: false);
}

// the stateful version
Expand Down Expand Up @@ -949,25 +939,26 @@ private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
R.Set(JsRegExp.PropertyLastIndex, e, true);
}

return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
return CreateReturnValueArray(R, match, s, fullUnicode, hasIndices);
}

private static JsArray CreateReturnValueArray(
Engine engine,
Regex regex,
JsRegExp rei,
Match match,
string s,
bool fullUnicode,
bool hasIndices)
{
var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
var engine = rei.Engine;
var actualGroupCount = GetActualRegexGroupCount(rei, match);
var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) actualGroupCount);
array.CreateDataProperty(PropertyIndex, match.Index);
array.CreateDataProperty(PropertyInput, s);

ObjectInstance? groups = null;
List<string>? groupNames = null;
var indices = hasIndices ? new List<JsNumber[]?>(match.Groups.Count) : null;
for (uint i = 0; i < match.Groups.Count; i++)
var indices = hasIndices ? new List<JsNumber[]?>(actualGroupCount) : null;
for (uint i = 0; i < actualGroupCount; i++)
{
var capture = match.Groups[(int) i];
var capturedValue = Undefined;
Expand All @@ -988,7 +979,7 @@ private static JsValue RegExpBuiltinExec(JsRegExp R, string s)
}
}

var groupName = GetRegexGroupName(regex, (int) i);
var groupName = GetRegexGroupName(rei, (int) i);
if (!string.IsNullOrWhiteSpace(groupName))
{
groups ??= OrdinaryObjectCreate(engine, null);
Expand Down Expand Up @@ -1055,35 +1046,28 @@ private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] mat
return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
}

private static string? GetRegexGroupName(Regex regex, int index)
private static int GetActualRegexGroupCount(JsRegExp rei, Match match)
{
return rei.ParseResult.Success ? rei.ParseResult.ActualRegexGroupCount : match.Groups.Count;
}

private static string? GetRegexGroupName(JsRegExp rei, int index)
{
if (index == 0)
{
return null;
}
var regex = rei.Value;
if (rei.ParseResult.Success)
{
return rei.ParseResult.GetRegexGroupName(index);
}

var groupNameFromNumber = regex.GroupNameFromNumber(index);
if (groupNameFromNumber.Length == 1 && groupNameFromNumber[0] == 48 + index)
{
// regex defaults to index as group name when it's not a named group
return null;

}

// The characters allowed in group names differs between the JS and .NET regex engines.
// For example the group name "$group" is valid in JS but invalid in .NET.
// As a workaround for this issue, the parser make an attempt to encode the problematic group names to
// names which are valid in .NET and probably won't collide with other group names present in the pattern
// (https://github.com/sebastienros/esprima-dotnet/blob/v3.0.0-rc-03/src/Esprima/Scanner.RegExpParser.cs#L942).
// We need to decode such group names.
const string encodedGroupNamePrefix = "__utf8_";
if (groupNameFromNumber.StartsWith(encodedGroupNamePrefix, StringComparison.Ordinal))
{
try
{
var bytes = groupNameFromNumber.AsSpan(encodedGroupNamePrefix.Length).BytesFromHexString();
groupNameFromNumber = Encoding.UTF8.GetString(bytes);
}
catch { /* intentional no-op */ }
}

return groupNameFromNumber;
Expand Down
7 changes: 4 additions & 3 deletions Jint/Runtime/Interpreter/Expressions/JintLiteralExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,13 @@ private JsValue ResolveValue(EvaluationContext context)
if (expression.TokenType == TokenType.RegularExpression)
{
var regExpLiteral = (RegExpLiteral) _expression;
if (regExpLiteral.Value is System.Text.RegularExpressions.Regex regex)
var regExpParseResult = regExpLiteral.ParseResult;
if (regExpParseResult.Success)
{
return context.Engine.Realm.Intrinsics.RegExp.Construct(regex, regExpLiteral.Regex.Pattern, regExpLiteral.Regex.Flags);
return context.Engine.Realm.Intrinsics.RegExp.Construct(regExpParseResult.Regex!, regExpLiteral.Regex.Pattern, regExpLiteral.Regex.Flags, regExpParseResult);
}

ExceptionHelper.ThrowSyntaxError(context.Engine.Realm, $"Unsupported regular expression: '{regExpLiteral.Regex.Pattern}/{regExpLiteral.Regex.Flags}'");
ExceptionHelper.ThrowSyntaxError(context.Engine.Realm, $"Unsupported regular expression. {regExpParseResult.ConversionError!.Description}");
}

return JsValue.FromObject(context.Engine, expression.Value);
Expand Down