Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance and misc improvements #365

Merged
merged 21 commits into from
May 12, 2021
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion AppInspector/Commands/AnalyzeCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ private void ConfigSourcetoScan()
}
catch (Exception)
{
throw new OpException(MsgHelp.FormatString(MsgHelp.ID.CMD_INVALID_FILE_OR_DIR, _options.SourcePath));
throw;
}
}
else if (File.Exists(_options.SourcePath)) //not a directory but make one for single flow
Expand Down
4 changes: 1 addition & 3 deletions AppInspector/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ public static string GetVersionString()

public static string GetVersion()
{
Assembly assembly = Assembly.GetExecutingAssembly();
FileVersionInfo fileVersionInfo = FileVersionInfo.GetVersionInfo(assembly.Location);
return fileVersionInfo.ProductVersion ?? string.Empty;
return (Assembly.GetExecutingAssembly().GetCustomAttributes(typeof(AssemblyInformationalVersionAttribute), false) as AssemblyInformationalVersionAttribute[])?[0].InformationalVersion ?? "Unknown";
}

public static bool CLIExecutionContext { get; set; }
Expand Down
20 changes: 20 additions & 0 deletions RulesEngine/OatSubstringIndexClause.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) Microsoft. All rights reserved. Licensed under the MIT License.

using Microsoft.CST.OAT;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is OatSubstringIIndex generic enough to move into OAT: https://github.com/microsoft/OAT/tree/main/OAT/Operations?, or did I get that wrong?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few issues.

It's not much different than the contains operator that already exists and is much more broad in terms of the data types it can work on.

Secondly, built-in operations in OAT should not be for one specific data type (for example this just works for strings).

The main distinction is that we are capturing and returning the index of the match, a behavior that is not mirrored in any other OAT operation, and doesn't make contextual sense outside of simple string matches.

See the existing AI code and AI uses it's own regex instead of the OAT regex because OAT doesn't deal with Boundary objects, that's a AI concept.


namespace Microsoft.ApplicationInspector.RulesEngine
{
public class OATSubstringIndexClause : Clause
{
public OATSubstringIndexClause(PatternScope[] scopes, string? field = null, bool useWordBoundaries = false) : base(Operation.Custom, field)
{
Scopes = scopes;
CustomOperation = "SubstringIndex";
UseWordBoundaries = useWordBoundaries;
}

public PatternScope[] Scopes { get; }

public bool UseWordBoundaries {get;}
}
}
101 changes: 101 additions & 0 deletions RulesEngine/OatSubstringIndexOperation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
using Microsoft.CST.OAT;
using Microsoft.CST.OAT.Operations;
using Microsoft.CST.OAT.Utils;
using Serilog;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.RegularExpressions;

namespace Microsoft.ApplicationInspector.RulesEngine
{
/// <summary>
/// The Custom Operation to enable identification of pattern index in result used by Application Inspector to report why a given
/// result was matched and to retrieve other pattern level meta-data
/// </summary>
public class OATSubstringIndexOperation : OatOperation
{
/// <summary>
/// Create an OatOperation given an analyzer
/// </summary>
/// <param name="analyzer">The analyzer context to work with</param>
public OATSubstringIndexOperation(Analyzer analyzer) : base(Operation.Custom, analyzer)
{
CustomOperation = "SubstringIndex";
OperationDelegate = SubstringIndexOperationDelegate;
ValidationDelegate = SubstringIndexValidationDelegate;
}

public IEnumerable<Violation> SubstringIndexValidationDelegate(CST.OAT.Rule rule, Clause clause)
{
if (clause.Data?.Count == null || clause.Data?.Count == 0)
{
yield return new Violation(string.Format(Strings.Get("Err_ClauseNoData"), rule.Name, clause.Label ?? rule.Clauses.IndexOf(clause).ToString(CultureInfo.InvariantCulture)), rule, clause);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

out of curiosity, how is yield helping here?

Copy link
Contributor Author

@gfs gfs May 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The api you implement is an enumerable so

  1. You can Fail fast when validating on the first error if you don't care what they all are
  2. The function doesn't have to keep a list of the found errors.

You must implement this method as an IEnumerable to inherit from OATOperation.

}
if (clause.DictData != null && clause.DictData?.Count > 0)
{
yield return new Violation(string.Format(Strings.Get("Err_ClauseDictDataUnexpected"), rule.Name, clause.Label ?? rule.Clauses.IndexOf(clause).ToString(CultureInfo.InvariantCulture), clause.Operation.ToString()), rule, clause);
}
}

/// <summary>
/// Returns results with pattern index and Boundary as a tuple to enable retrieval of Rule pattern level meta-data like Confidence and report the
/// pattern that was responsible for the match
/// </summary>
/// <param name="clause"></param>
/// <param name="state1"></param>
/// <param name="state2"></param>
/// <param name="captures"></param>
/// <returns></returns>
public OperationResult SubstringIndexOperationDelegate(Clause clause, object? state1, object? state2, IEnumerable<ClauseCapture>? captures)
{
if (state1 is TextContainer tc && clause is OATSubstringIndexClause src)
{
if (clause.Data is List<string> stringList && stringList.Any())
{
var outmatches = new List<(int, Boundary)>();//tuple results i.e. pattern index and where

for (int i = 0; i < stringList.Count; i++)
{
var idx = tc.FullContent.IndexOf(stringList[i]);
while (idx != -1)
{
bool skip = false;
if (src.UseWordBoundaries)
{
if (idx > 0 && char.IsLetterOrDigit(tc.FullContent[idx - 1]))
{
skip = true;
}
if (idx + stringList[i].Length < tc.FullContent.Length && char.IsLetterOrDigit(tc.FullContent[idx + stringList[i].Length]))
{
skip = true;
}
}
if (!skip)
{
Boundary newBoundary = new Boundary()
{
Length = stringList[i].Length,
Index = idx
};
if (tc.ScopeMatch(src.Scopes, newBoundary))
{
outmatches.Add((i, newBoundary));
}
}

idx = tc.FullContent.IndexOf(stringList[i], idx + stringList[i].Length);
}
}

var result = src.Invert ? outmatches.Count == 0 : outmatches.Count > 0;
return new OperationResult(result, result && src.Capture ? new TypedClauseCapture<List<(int, Boundary)>>(clause, outmatches, state1) : null);
}
}
return new OperationResult(false, null);
}
}
}
33 changes: 13 additions & 20 deletions RulesEngine/RuleProcessor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ public RuleProcessor(RuleSet rules, RuleProcessorOptions opts)

analyzer = new Analyzer();
analyzer.SetOperation(new WithinOperation(analyzer));
analyzer.SetOperation(new OATRegexWithIndexOperation(analyzer)); //CHECK with OAT team as delegate doesn't appear to fire; ALT working fine in Analyze method anyway
analyzer.SetOperation(new OATRegexWithIndexOperation(analyzer));
analyzer.SetOperation(new OATSubstringIndexOperation(analyzer));
}


Expand Down Expand Up @@ -183,9 +184,11 @@ List<MatchRecord> ProcessBoundary(ClauseCapture cap)
LanguageInfo = languageInfo,
Boundary = boundary,
StartLocationLine = StartLocation.Line,
StartLocationColumn = StartLocation.Column,
EndLocationLine = EndLocation.Line != 0 ? EndLocation.Line : StartLocation.Line + 1, //match is on last line
EndLocationColumn = EndLocation.Column,
MatchingPattern = oatRule.AppInspectorRule.Patterns[patternIndex],
Excerpt = numLinesContext > -1 ? ExtractExcerpt(textContainer, StartLocation.Line, numLinesContext) : string.Empty,
Excerpt = numLinesContext > 0 ? ExtractExcerpt(textContainer, StartLocation.Line, numLinesContext) : string.Empty,
Sample = numLinesContext > -1 ? ExtractTextSample(textContainer.FullContent, boundary.Index, boundary.Length) : string.Empty
};

Expand Down Expand Up @@ -236,7 +239,7 @@ public List<MatchRecord> AnalyzeFile(FileEntry fileEntry, LanguageInfo languageI
return AnalyzeFile(sr.ReadToEnd(), fileEntry, languageInfo, tagsToIgnore, numLinesContext);
}

public async Task<List<MatchRecord>> AnalyzeFileAsync(FileEntry fileEntry, LanguageInfo languageInfo, CancellationToken cancellationToken, IEnumerable<string>? tagsToIgnore = null)
public async Task<List<MatchRecord>> AnalyzeFileAsync(FileEntry fileEntry, LanguageInfo languageInfo, CancellationToken cancellationToken, IEnumerable<string>? tagsToIgnore = null, int numLinesContext = 3)
{
var rulesByLanguage = GetRulesByLanguage(languageInfo.Name).Where(x => !x.AppInspectorRule.Disabled && SeverityLevel.HasFlag(x.AppInspectorRule.Severity));
var rules = rulesByLanguage.Union(GetRulesByFileName(fileEntry.FullPath).Where(x => !x.AppInspectorRule.Disabled && SeverityLevel.HasFlag(x.AppInspectorRule.Severity)));
Expand Down Expand Up @@ -307,8 +310,8 @@ List<MatchRecord> ProcessBoundary(ClauseCapture cap)
StartLocationLine = StartLocation.Line,
EndLocationLine = EndLocation.Line != 0 ? EndLocation.Line : StartLocation.Line + 1, //match is on last line
MatchingPattern = oatRule.AppInspectorRule.Patterns[patternIndex],
Excerpt = ExtractExcerpt(textContainer, StartLocation.Line),
Sample = ExtractTextSample(textContainer.FullContent, boundary.Index, boundary.Length)
Excerpt = numLinesContext > 0 ? ExtractExcerpt(textContainer, StartLocation.Line, numLinesContext) : string.Empty,
Sample = numLinesContext > -1 ? ExtractTextSample(textContainer.FullContent, boundary.Index, boundary.Length) : string.Empty
};

if (oatRule.AppInspectorRule.Tags?.Contains("Dependency.SourceInclude") ?? false)
Expand Down Expand Up @@ -447,23 +450,13 @@ private IEnumerable<ConvertedOatRule> GetRulesByFileName(string input)
/// </summary>
private string ExtractTextSample(string fileText, int index, int length)
{
string result = "";
try
{
//some js file results may be too long for practical display
if (length > MAX_TEXT_SAMPLE_LENGTH)
{
length = MAX_TEXT_SAMPLE_LENGTH;
}
if (index < 0 || length < 0) { return fileText; }

result = fileText.Substring(index, length).Trim();
}
catch (Exception e)
{
_logger?.Error(e.Message + " in ExtractTextSample");
}
length = Math.Min(Math.Min(length, MAX_TEXT_SAMPLE_LENGTH), fileText.Length - index);

if (length == 0) { return string.Empty; }

return result;
return fileText[index..(index + length)].Trim();
}

/// <summary>
Expand Down
67 changes: 42 additions & 25 deletions RulesEngine/Ruleset.cs
Original file line number Diff line number Diff line change
Expand Up @@ -173,28 +173,55 @@ public IEnumerable<ConvertedOatRule> GetUniversalRules()
{
var scopes = pattern.Scopes ?? new PatternScope[] { PatternScope.All };
var modifiers = pattern.Modifiers ?? Array.Empty<string>();
if (clauses.Where(x => x is OATRegexWithIndexClause src &&
src.Arguments.SequenceEqual(modifiers) && src.Scopes.SequenceEqual(scopes)) is IEnumerable<Clause> filteredClauses &&
filteredClauses.Any() && filteredClauses.First().Data is List<string> found)
if (pattern.PatternType == PatternType.String || pattern.PatternType == PatternType.Substring)
{
found.Add(pattern.Pattern);
if (clauses.Where(x => x is OATSubstringIndexClause src &&
src.Arguments.SequenceEqual(modifiers) && src.Scopes.SequenceEqual(scopes) && src.UseWordBoundaries) is IEnumerable<Clause> filteredClauses &&
filteredClauses.Any() && filteredClauses.First().Data is List<string> found)
{
found.Add(pattern.Pattern);
}
else
{
clauses.Add(new OATSubstringIndexClause(scopes, useWordBoundaries: pattern.PatternType == PatternType.String)
{
Label = clauseNumber.ToString(CultureInfo.InvariantCulture),//important to pattern index identification
Data = new List<string>() { pattern.Pattern },
Capture = true
});
if (clauseNumber > 0)
{
expression.Append(" OR ");
}
expression.Append(clauseNumber);
clauseNumber++;
}
}
else
{
clauses.Add(new OATRegexWithIndexClause(scopes)
if (clauses.Where(x => x is OATRegexWithIndexClause src &&
src.Arguments.SequenceEqual(modifiers) && src.Scopes.SequenceEqual(scopes)) is IEnumerable<Clause> filteredClauses &&
filteredClauses.Any() && filteredClauses.First().Data is List<string> found)
{
Label = clauseNumber.ToString(CultureInfo.InvariantCulture),//important to pattern index identification
Data = new List<string>() { pattern.Pattern },
Capture = true,
Arguments = pattern.Modifiers?.ToList() ?? new List<string>(),
CustomOperation = "RegexWithIndex"
});
if (clauseNumber > 0)
found.Add(pattern.Pattern);
}
else
{
expression.Append(" OR ");
clauses.Add(new OATRegexWithIndexClause(scopes)
{
Label = clauseNumber.ToString(CultureInfo.InvariantCulture),//important to pattern index identification
Data = new List<string>() { pattern.Pattern },
Capture = true,
Arguments = pattern.Modifiers?.ToList() ?? new List<string>(),
CustomOperation = "RegexWithIndex"
});
if (clauseNumber > 0)
{
expression.Append(" OR ");
}
expression.Append(clauseNumber);
clauseNumber++;
}
expression.Append(clauseNumber);
clauseNumber++;
}
}
}
Expand Down Expand Up @@ -358,16 +385,6 @@ private static void SanitizePatternRegex(SearchPattern pattern)
pattern.PatternType = PatternType.Regex;
pattern.Pattern = string.Format(CultureInfo.InvariantCulture, @"\b{0}\b", pattern.Pattern);
}
else if (pattern.PatternType == PatternType.String)
{
pattern.PatternType = PatternType.Regex;
pattern.Pattern = string.Format(CultureInfo.InvariantCulture, @"\b{0}\b", Regex.Escape(pattern.Pattern ?? string.Empty));
}
else if (pattern.PatternType == PatternType.Substring)
{
pattern.PatternType = PatternType.Regex;
pattern.Pattern = string.Format(CultureInfo.InvariantCulture, @"{0}", Regex.Escape(pattern.Pattern ?? string.Empty));
}
}
}
}