Skip to content

Commit

Permalink
Implement RegExp match indices (#1310)
Browse files Browse the repository at this point in the history
  • Loading branch information
lahma committed Oct 2, 2022
1 parent d7b8ca1 commit 9fbed07
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Jint.Tests.Test262/Test262Harness.settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
"Promise.allSettled",
"Promise.any",
"regexp-duplicate-named-groups",
"regexp-match-indices",
"regexp-lookbehind",
"regexp-unicode-property-escapes",
"regexp-v-flag",
Expand Down Expand Up @@ -61,6 +60,7 @@
"language/literals/regexp/u-case-mapping.js",

// cannot have characters like 𝒜 as group name or something starting with $ in .NET, other .NET limitations
"built-ins/RegExp/match-indices/indices-array-unicode-property-names.js",
"built-ins/RegExp/named-groups/non-unicode-match.js",
"built-ins/RegExp/named-groups/non-unicode-property-names-valid.js",
"built-ins/RegExp/named-groups/non-unicode-property-names.js",
Expand Down
2 changes: 1 addition & 1 deletion Jint/Jint.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Esprima" Version="3.0.0-beta-7" />
<PackageReference Include="Esprima" Version="3.0.0-beta-8" />
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="all" />
<PackageReference Include="Nullable" Version="1.3.1" PrivateAssets="all" />
</ItemGroup>
Expand Down
16 changes: 15 additions & 1 deletion Jint/Native/Array/ArrayConstructor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ public ObjectInstance ArraySpeciesCreate(ObjectInstance originalArray, ulong len
return ((IConstructor) c).Construct(new JsValue[] { JsNumber.Create(length) }, c);
}

internal JsValue CreateArrayFromList(List<JsValue> values)
internal ArrayInstance CreateArrayFromList<T>(List<T> values) where T : JsValue
{
var jsArray = ArrayCreate((uint) values.Count);
var index = 0;
Expand All @@ -464,6 +464,20 @@ internal JsValue CreateArrayFromList(List<JsValue> values)
return jsArray;
}

internal ArrayInstance CreateArrayFromList<T>(T[] values) where T : JsValue
{
var jsArray = ArrayCreate((uint) values.Length);
var index = 0;
for (; index < values.Length; index++)
{
var item = values[index];
jsArray.SetIndexValue((uint) index, item, false);
}

jsArray.SetLength((uint) index);
return jsArray;
}

private void ValidateLength(double length)
{
if (length < 0 || length > ArrayOperations.MaxArrayLikeLength || ((long) length) != length)
Expand Down
33 changes: 33 additions & 0 deletions Jint/Native/Array/ArrayInstance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,39 @@ IEnumerator IEnumerable.GetEnumerator()
return GetEnumerator();
}

internal void Push(JsValue value)
{
var initialLength = GetLength();
var newLength = initialLength + 1;

var temp = _dense;
var canUseDirectIndexSet = temp != null && newLength <= temp.Length;

double n = initialLength;
var desc = new PropertyDescriptor(value, PropertyFlag.ConfigurableEnumerableWritable);
if (canUseDirectIndexSet)
{
temp![(uint) n] = desc;
}
else
{
WriteValueSlow(n, desc);
}

// check if we can set length fast without breaking ECMA specification
if (n < uint.MaxValue && CanSetLength())
{
_length!.Value = newLength;
}
else
{
if (!Set(CommonProperties.Length, newLength))
{
ExceptionHelper.ThrowTypeError(_engine.Realm);
}
}
}

internal uint Push(JsValue[] arguments)
{
var initialLength = GetLength();
Expand Down
8 changes: 8 additions & 0 deletions Jint/Native/RegExp/RegExpInstance.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ public string Flags
{
switch (c)
{
case 'd':
Indices = true;
break;
case 'i':
IgnoreCase = true;
break;
Expand All @@ -51,17 +54,22 @@ public string Flags
case 'u':
FullUnicode = true;
break;
case 'v':
UnicodeSets = true;
break;
}
}
}
}

public bool DotAll { get; private set; }
public bool Global { get; private set; }
public bool Indices { get; private set; }
public bool IgnoreCase { get; private set; }
public bool Multiline { get; private set; }
public bool Sticky { get; private set; }
public bool FullUnicode { get; private set; }
public bool UnicodeSets { get; private set; }

public override PropertyDescriptor GetOwnProperty(JsValue property)
{
Expand Down
105 changes: 91 additions & 14 deletions Jint/Native/RegExp/RegExpPrototype.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,22 @@ GetSetPropertyDescriptor CreateGetAccessorDescriptor(string name, Func<RegExpIns
}

const PropertyFlag propertyFlags = PropertyFlag.Configurable | PropertyFlag.Writable;
var properties = new PropertyDictionary(12, checkExistingKeys: false)
var properties = new PropertyDictionary(14, checkExistingKeys: false)
{
["constructor"] = new PropertyDescriptor(_constructor, propertyFlags),
["toString"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "toString", ToRegExpString, 0, lengthFlags), propertyFlags),
["exec"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "exec", _defaultExec, 1, lengthFlags), propertyFlags),
["test"] = new PropertyDescriptor(new ClrFunctionInstance(Engine, "test", Test, 1, lengthFlags), propertyFlags),
["dotAll"] = CreateGetAccessorDescriptor("get dotAll", r => r.DotAll),
["dotAll"] = CreateGetAccessorDescriptor("get dotAll", static r => r.DotAll),
["flags"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get flags", Flags, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
["global"] = CreateGetAccessorDescriptor("get global", r => r.Global),
["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", r => r.IgnoreCase),
["multiline"] = CreateGetAccessorDescriptor("get multiline", r => r.Multiline),
["global"] = CreateGetAccessorDescriptor("get global", static r => r.Global),
["hasIndices"] = CreateGetAccessorDescriptor("get hasIndices", static r => r.Indices),
["ignoreCase"] = CreateGetAccessorDescriptor("get ignoreCase", static r => r.IgnoreCase),
["multiline"] = CreateGetAccessorDescriptor("get multiline", static r => r.Multiline),
["source"] = new GetSetPropertyDescriptor(get: new ClrFunctionInstance(Engine, "get source", Source, 0, lengthFlags), set: Undefined, flags: PropertyFlag.Configurable),
["sticky"] = CreateGetAccessorDescriptor("get sticky", r => r.Sticky),
["unicode"] = CreateGetAccessorDescriptor("get unicode", r => r.FullUnicode)
["sticky"] = CreateGetAccessorDescriptor("get sticky", static r => r.Sticky),
["unicode"] = CreateGetAccessorDescriptor("get unicode", static r => r.FullUnicode),
["unicodeSets"] = CreateGetAccessorDescriptor("get unicodeSets", static r => r.UnicodeSets)
};
SetProperties(properties);

Expand Down Expand Up @@ -589,11 +591,13 @@ static string AddFlagIfPresent(JsValue o, JsValue p, char flag, string s)
return TypeConverter.ToBoolean(o.Get(p)) ? s + flag : s;
}

var result = AddFlagIfPresent(r, PropertyGlobal, 'g', "");
var result = AddFlagIfPresent(r, "hasIndices", 'd', "");
result = AddFlagIfPresent(r, PropertyGlobal, 'g', result);
result = AddFlagIfPresent(r, "ignoreCase", 'i', result);
result = AddFlagIfPresent(r, "multiline", 'm', result);
result = AddFlagIfPresent(r, "dotAll", 's', result);
result = AddFlagIfPresent(r, "unicode", 'u', result);
result = AddFlagIfPresent(r, "unicodeSets", 'v', result);
result = AddFlagIfPresent(r, PropertySticky, 'y', result);

return result;
Expand Down Expand Up @@ -879,8 +883,9 @@ private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)

var matcher = R.Value;
var fullUnicode = R.FullUnicode;
var hasIndices = R.Indices;

if (!global & !sticky && !fullUnicode)
if (!global & !sticky && !fullUnicode && !hasIndices)
{
// we can the non-stateful fast path which is the common case
var m = matcher.Match(s, (int) lastIndex);
Expand All @@ -889,7 +894,7 @@ private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)
return Null;
}

return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false);
return CreateReturnValueArray(R.Engine, matcher, m, s, fullUnicode: false, hasIndices: false);
}

// the stateful version
Expand Down Expand Up @@ -924,7 +929,7 @@ private static JsValue RegExpBuiltinExec(RegExpInstance R, string s)
R.Set(RegExpInstance.PropertyLastIndex, e, true);
}

return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode);
return CreateReturnValueArray(R.Engine, matcher, match, s, fullUnicode, hasIndices);
}

/// <summary>
Expand Down Expand Up @@ -956,37 +961,109 @@ private static int GetStringIndex(string s, int codePointIndex)
return len;
}

private static ArrayInstance CreateReturnValueArray(Engine engine, Regex regex, Match match, string inputValue, bool fullUnicode)
private static ArrayInstance CreateReturnValueArray(
Engine engine,
Regex regex,
Match match,
string s,
bool fullUnicode,
bool hasIndices)
{
var array = engine.Realm.Intrinsics.Array.ArrayCreate((ulong) match.Groups.Count);
array.CreateDataProperty(PropertyIndex, match.Index);
array.CreateDataProperty(PropertyInput, inputValue);
array.CreateDataProperty(PropertyInput, s);

ObjectInstance? groups = null;
List<string>? groupNames = null;
var indices = hasIndices ? new List<JsNumber[]?>(match.Groups.Count) : null;
for (uint i = 0; i < match.Groups.Count; i++)
{
var capture = i < match.Groups.Count ? match.Groups[(int) i] : null;
var capture = match.Groups[(int) i];
var capturedValue = Undefined;
if (capture?.Success == true)
{
capturedValue = capture.Value;
}

if (hasIndices)
{
if (capture?.Success == true)
{
indices!.Add(new[] { JsNumber.Create(capture.Index), JsNumber.Create(capture.Index + capture.Length) });
}
else
{
indices!.Add(null);
}
}

var groupName = GetRegexGroupName(regex, (int) i);
if (!string.IsNullOrWhiteSpace(groupName))
{
groups ??= OrdinaryObjectCreate(engine, null);
groups.CreateDataPropertyOrThrow(groupName, capturedValue);
groupNames ??= new List<string>();
groupNames.Add(groupName!);
}

array.SetIndexValue(i, capturedValue, updateLength: false);
}

array.CreateDataProperty(PropertyGroups, groups ?? Undefined);

if (hasIndices)
{
var indicesArray = MakeMatchIndicesIndexPairArray(engine, s, indices!, groupNames, groupNames?.Count > 0);
array.CreateDataPropertyOrThrow("indices", indicesArray);
}

return array;
}

/// <summary>
/// https://tc39.es/ecma262/#sec-makematchindicesindexpairarray
/// </summary>
private static ArrayInstance MakeMatchIndicesIndexPairArray(
Engine engine,
string s,
List<JsNumber[]?> indices,
List<string>? groupNames,
bool hasGroups)
{
var n = indices.Count;
var a = engine.Realm.Intrinsics.Array.Construct((uint) n);
ObjectInstance? groups = null;
if (hasGroups)
{
groups = OrdinaryObjectCreate(engine, null);
}

a.CreateDataPropertyOrThrow("groups", groups ?? Undefined);
for (var i = 0; i < n; ++i)
{
var matchIndices = indices[i];

var matchIndexPair = matchIndices is not null
? GetMatchIndexPair(engine, s, matchIndices)
: Undefined;

a.Push(matchIndexPair);
if (i > 0 && !string.IsNullOrWhiteSpace(groupNames?[i - 1]))
{
groups!.CreateDataPropertyOrThrow(groupNames![i - 1], matchIndexPair);
}
}
return a;
}

/// <summary>
/// https://tc39.es/ecma262/#sec-getmatchindexpair
/// </summary>
private static JsValue GetMatchIndexPair(Engine engine, string s, JsNumber[] match)
{
return engine.Realm.Intrinsics.Array.CreateArrayFromList(match);
}

private static string? GetRegexGroupName(Regex regex, int index)
{
if (index == 0)
Expand Down

0 comments on commit 9fbed07

Please sign in to comment.