-
Notifications
You must be signed in to change notification settings - Fork 351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Performance and misc improvements #365
Changes from all commits
7a2be43
644aba0
68817ba
7b95b2d
ae196b8
c17e351
a788b1a
12167e9
7811877
cadf944
ce7f170
15dec50
95a7c56
245cfad
b0da714
17494c0
1e8cbcb
24d493a
b653794
a2d75c8
0e04c88
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
// Copyright (C) Microsoft. All rights reserved. Licensed under the MIT License. | ||
|
||
using Microsoft.CST.OAT; | ||
|
||
namespace Microsoft.ApplicationInspector.RulesEngine | ||
{ | ||
public class OATSubstringIndexClause : Clause | ||
{ | ||
public OATSubstringIndexClause(PatternScope[] scopes, string? field = null, bool useWordBoundaries = false) : base(Operation.Custom, field) | ||
{ | ||
Scopes = scopes; | ||
CustomOperation = "SubstringIndex"; | ||
UseWordBoundaries = useWordBoundaries; | ||
} | ||
|
||
public PatternScope[] Scopes { get; } | ||
|
||
public bool UseWordBoundaries {get;} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
using Microsoft.CST.OAT; | ||
using Microsoft.CST.OAT.Operations; | ||
using Microsoft.CST.OAT.Utils; | ||
using Serilog; | ||
using System; | ||
using System.Collections.Concurrent; | ||
using System.Collections.Generic; | ||
using System.Globalization; | ||
using System.Linq; | ||
using System.Text.RegularExpressions; | ||
|
||
namespace Microsoft.ApplicationInspector.RulesEngine | ||
{ | ||
/// <summary> | ||
/// The Custom Operation to enable identification of pattern index in result used by Application Inspector to report why a given | ||
/// result was matched and to retrieve other pattern level meta-data | ||
/// </summary> | ||
public class OATSubstringIndexOperation : OatOperation | ||
{ | ||
/// <summary> | ||
/// Create an OatOperation given an analyzer | ||
/// </summary> | ||
/// <param name="analyzer">The analyzer context to work with</param> | ||
public OATSubstringIndexOperation(Analyzer analyzer) : base(Operation.Custom, analyzer) | ||
{ | ||
CustomOperation = "SubstringIndex"; | ||
OperationDelegate = SubstringIndexOperationDelegate; | ||
ValidationDelegate = SubstringIndexValidationDelegate; | ||
} | ||
|
||
public IEnumerable<Violation> SubstringIndexValidationDelegate(CST.OAT.Rule rule, Clause clause) | ||
{ | ||
if (clause.Data?.Count == null || clause.Data?.Count == 0) | ||
{ | ||
yield return new Violation(string.Format(Strings.Get("Err_ClauseNoData"), rule.Name, clause.Label ?? rule.Clauses.IndexOf(clause).ToString(CultureInfo.InvariantCulture)), rule, clause); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. out of curiosity, how is yield helping here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The api you implement is an enumerable so
You must implement this method as an IEnumerable to inherit from OATOperation. |
||
} | ||
if (clause.DictData != null && clause.DictData?.Count > 0) | ||
{ | ||
yield return new Violation(string.Format(Strings.Get("Err_ClauseDictDataUnexpected"), rule.Name, clause.Label ?? rule.Clauses.IndexOf(clause).ToString(CultureInfo.InvariantCulture), clause.Operation.ToString()), rule, clause); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Returns results with pattern index and Boundary as a tuple to enable retrieval of Rule pattern level meta-data like Confidence and report the | ||
/// pattern that was responsible for the match | ||
/// </summary> | ||
/// <param name="clause"></param> | ||
/// <param name="state1"></param> | ||
/// <param name="state2"></param> | ||
/// <param name="captures"></param> | ||
/// <returns></returns> | ||
public OperationResult SubstringIndexOperationDelegate(Clause clause, object? state1, object? state2, IEnumerable<ClauseCapture>? captures) | ||
{ | ||
var comparisonType = clause.Arguments.Contains("i") ? StringComparison.InvariantCultureIgnoreCase : StringComparison.InvariantCulture; | ||
if (state1 is TextContainer tc && clause is OATSubstringIndexClause src) | ||
{ | ||
if (clause.Data is List<string> stringList && stringList.Any()) | ||
{ | ||
var outmatches = new List<(int, Boundary)>();//tuple results i.e. pattern index and where | ||
|
||
for (int i = 0; i < stringList.Count; i++) | ||
{ | ||
var idx = tc.FullContent.IndexOf(stringList[i], comparisonType); | ||
while (idx != -1) | ||
{ | ||
bool skip = false; | ||
if (src.UseWordBoundaries) | ||
{ | ||
if (idx > 0 && char.IsLetterOrDigit(tc.FullContent[idx - 1])) | ||
{ | ||
skip = true; | ||
} | ||
if (idx + stringList[i].Length < tc.FullContent.Length && char.IsLetterOrDigit(tc.FullContent[idx + stringList[i].Length])) | ||
{ | ||
skip = true; | ||
} | ||
} | ||
if (!skip) | ||
{ | ||
Boundary newBoundary = new Boundary() | ||
{ | ||
Length = stringList[i].Length, | ||
Index = idx | ||
}; | ||
if (tc.ScopeMatch(src.Scopes, newBoundary)) | ||
{ | ||
outmatches.Add((i, newBoundary)); | ||
} | ||
} | ||
idx = tc.FullContent.IndexOf(stringList[i], idx + stringList[i].Length, comparisonType); | ||
} | ||
} | ||
|
||
var result = src.Invert ? outmatches.Count == 0 : outmatches.Count > 0; | ||
return new OperationResult(result, result && src.Capture ? new TypedClauseCapture<List<(int, Boundary)>>(clause, outmatches, state1) : null); | ||
} | ||
} | ||
return new OperationResult(false, null); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is OatSubstringIIndex generic enough to move into OAT: https://github.com/microsoft/OAT/tree/main/OAT/Operations?, or did I get that wrong?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A few issues.
It's not much different than the contains operator that already exists and is much more broad in terms of the data types it can work on.
Secondly, built-in operations in OAT should not be for one specific data type (for example this just works for strings).
The main distinction is that we are capturing and returning the index of the match, a behavior that is not mirrored in any other OAT operation, and doesn't make contextual sense outside of simple string matches.
See the existing AI code and AI uses it's own regex instead of the OAT regex because OAT doesn't deal with Boundary objects, that's a AI concept.