Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance and misc improvements #365

Merged
merged 21 commits into from
May 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion AppInspector.CLI/Writers/VerifyRulesTextWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public override void WriteResults(Result result, CLICommandOptions commandOption
WriteOnce.Result("Rule status");
foreach (RuleStatus ruleStatus in verifyRulesResult.RuleStatusList)
{
WriteOnce.General(string.Format("Ruleid: {0}, Rulename: {1}, Status: {2}", ruleStatus.RulesId, ruleStatus.RulesName, ruleStatus.Verified));
WriteOnce.Result(string.Format("Ruleid: {0}, Rulename: {1}, Status: {2}", ruleStatus.RulesId, ruleStatus.RulesName, ruleStatus.Verified));
}
}

Expand Down
2 changes: 1 addition & 1 deletion AppInspector/Commands/AnalyzeCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ private void ConfigSourcetoScan()
}
catch (Exception)
{
throw new OpException(MsgHelp.FormatString(MsgHelp.ID.CMD_INVALID_FILE_OR_DIR, _options.SourcePath));
throw;
}
}
else if (File.Exists(_options.SourcePath)) //not a directory but make one for single flow
Expand Down
33 changes: 23 additions & 10 deletions AppInspector/Commands/VerifyRulesCommand.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
// Copyright (C) Microsoft. All rights reserved.
// Licensed under the MIT License. See LICENSE.txt in the project root for license information.

using Microsoft.ApplicationInspector.RulesEngine;
using Microsoft.CST.OAT;
using Newtonsoft.Json;
using NLog;
using System;
using System.Collections.Generic;
using System.Linq;

namespace Microsoft.ApplicationInspector.Commands
{
Expand All @@ -20,6 +23,7 @@ public class RuleStatus
public string? RulesId { get; set; }
public string? RulesName { get; set; }
public bool Verified { get; set; }
public IEnumerable<Violation> OatIssues { get; set; } = Array.Empty<Violation>();
}

public class VerifyRulesResult : Result
Expand Down Expand Up @@ -105,17 +109,12 @@ private void ConfigRules()
{
WriteOnce.SafeLog("VerifyRulesCommand::ConfigRules", LogLevel.Trace);

if (_options.VerifyDefaultRules && !Utils.CLIExecutionContext)
{
throw new OpException(MsgHelp.GetString(MsgHelp.ID.VERIFY_RULES_NO_CLI_DEFAULT));
}

if (!_options.VerifyDefaultRules && string.IsNullOrEmpty(_options.CustomRulesPath))
{
throw new OpException(MsgHelp.GetString(MsgHelp.ID.CMD_NORULES_SPECIFIED));
}

_rules_path = _options.VerifyDefaultRules ? Utils.GetPath(Utils.AppPath.defaultRulesSrc) : _options.CustomRulesPath;
_rules_path = _options.VerifyDefaultRules ? null : _options.CustomRulesPath;
}

#endregion configure
Expand All @@ -134,18 +133,32 @@ public VerifyRulesResult GetResult()

try
{
RulesVerifier verifier = new RulesVerifier(_rules_path, _options.Log);
RulesVerifier verifier = new RulesVerifier(null, _options.Log);
verifyRulesResult.ResultCode = VerifyRulesResult.ExitCode.Verified;
verifyRulesResult.RuleStatusList = verifier.Verify();
verifyRulesResult.ResultCode = verifier.IsVerified ? VerifyRulesResult.ExitCode.Verified : VerifyRulesResult.ExitCode.NotVerified;
var stati = new List<RuleStatus>();
var analyzer = new Analyzer();
analyzer.SetOperation(new WithinOperation(analyzer));
analyzer.SetOperation(new OATRegexWithIndexOperation(analyzer));
analyzer.SetOperation(new OATSubstringIndexOperation(analyzer));
foreach (var rule in Utils.GetDefaultRuleSet().GetOatRules())
{
stati.Add(new RuleStatus()
{
RulesId = rule.AppInspectorRule.Id,
RulesName = rule.Name,
Verified = verifier.Verify(rule.AppInspectorRule),
OatIssues = analyzer.EnumerateRuleIssues(rule)
});
}
verifyRulesResult.RuleStatusList = stati;
verifyRulesResult.ResultCode = stati.All(x => x.Verified && !x.OatIssues.Any()) ? VerifyRulesResult.ExitCode.Verified : VerifyRulesResult.ExitCode.NotVerified;
}
catch (OpException e)
{
WriteOnce.Error(e.Message);
//caught for CLI callers with final exit msg about checking log or throws for DLL callers
throw;
}

return verifyRulesResult;
}
}
Expand Down
53 changes: 47 additions & 6 deletions AppInspector/RulesVerifier.cs
Original file line number Diff line number Diff line change
Expand Up @@ -144,19 +144,60 @@ public bool CheckIntegrity(Rule rule)
//valid search pattern
foreach (SearchPattern searchPattern in rule.Patterns ?? new SearchPattern[] { })
{
try
if (searchPattern.PatternType == PatternType.RegexWord || searchPattern.PatternType == PatternType.RegexWord)
{
if (string.IsNullOrEmpty(searchPattern.Pattern))
try
{
if (string.IsNullOrEmpty(searchPattern.Pattern))
{
throw new ArgumentException();
}
_ = new Regex(searchPattern.Pattern);
}
catch (Exception e)
{
throw new ArgumentException();
_logger?.Error(MsgHelp.FormatString(MsgHelp.ID.VERIFY_RULES_REGEX_FAIL, rule.Id ?? "", searchPattern.Pattern ?? "", e.Message));
return false;
}
_ = new Regex(searchPattern.Pattern);
}
catch (Exception e)
}

foreach(var condition in rule.Conditions ?? Array.Empty<SearchCondition>())
{
if (condition.SearchIn is null)
{
_logger?.Error(MsgHelp.FormatString(MsgHelp.ID.VERIFY_RULES_REGEX_FAIL, rule.Id ?? "", searchPattern.Pattern ?? "", e.Message));
_logger?.Error("SearchIn is null in {0}",rule.Id);
return false;
}
if (condition.SearchIn.StartsWith("finding-region"))
{
var parSplits = condition.SearchIn.Split(new char[] { ')', '(' });
if (parSplits.Length == 3)
{
var splits = parSplits[1].Split(',');
if (splits.Length == 2)
{
if (int.TryParse(splits[0], out int int1) && int.TryParse(splits[1], out int int2))
{
if (int1 == 0 && int2 == 0)
{
_logger?.Error("At least one finding region specifier must be non 0. {0}", rule.Id);
return false;
}
}
}
else
{
_logger?.Error("Improperly specified finding region. {0}", rule.Id);
return false;
}
}
else
{
_logger?.Error("Improperly specified finding region. {0}", rule.Id);
return false;
}
}
}

if (rule.Tags?.Length == 0)
Expand Down
4 changes: 1 addition & 3 deletions AppInspector/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ public static string GetVersionString()

public static string GetVersion()
{
Assembly assembly = Assembly.GetExecutingAssembly();
FileVersionInfo fileVersionInfo = FileVersionInfo.GetVersionInfo(assembly.Location);
return fileVersionInfo.ProductVersion ?? string.Empty;
return (Assembly.GetExecutingAssembly().GetCustomAttributes(typeof(AssemblyInformationalVersionAttribute), false) as AssemblyInformationalVersionAttribute[])?[0].InformationalVersion ?? "Unknown";
}

public static bool CLIExecutionContext { get; set; }
Expand Down
4 changes: 2 additions & 2 deletions AppInspector/rules/default/data_handling/deserialization.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@
"severity": "critical",
"patterns": [
{
"pattern": "unserialize(",
"type": "string",
"pattern": "unserialize\\(",
"type": "regex",
"scopes": [
"code"
],
Expand Down
4 changes: 2 additions & 2 deletions AppInspector/rules/default/data_handling/xml_parsing.json
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
"modifiers": [ "i" ],
"_comment": ""
},
"search_in": "finding-region(0,0)",
"search_in": "same-line",
"negate_finding": true,
"_comment": ""
}
Expand Down Expand Up @@ -203,7 +203,7 @@
"modifiers": [ "i" ],
"_comment": ""
},
"search_in": "finding-region(0,0)",
"search_in": "same-line",
"negate_finding": true,
"_comment": ""
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"conditions": [
{
"pattern": {
"pattern": "Uri\\.parse\\(\"https?:\/\/www\\.);",
"pattern": "Uri\\.parse\\(\"https?:\\/\\/www\\.\\);",
"type": "regex",
"scopes": [
"code"
Expand Down
4 changes: 2 additions & 2 deletions AppInspector/rules/default/webapp/storage.json
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@
],
"_comment": ""
},
"search_in": "finding-region-5,5",
"search_in": "finding-region(-5,5)",
"negate_finding": false,
"_comment": ""
}
Expand Down Expand Up @@ -235,7 +235,7 @@
"modifiers": [ "i" ],
"_comment": ""
},
"search_in": "finding-region-5,5",
"search_in": "finding-region(-5,5)",
"negate_finding": false,
"_comment": ""
}
Expand Down
5 changes: 3 additions & 2 deletions Benchmarks/AnalyzeBenchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace Benchmarks
public class AnalyzeBenchmark
{
// Manually put the file you want to benchmark. But don't put this in a path with "Test" in the name ;)
private const string path = "C:\\Users\\gstocco\\Downloads\\runtime-main\\runtime-main.zip\\runtime-main\\src\\coreclr\\binder";
private const string path = "C:\\Users\\gstocco\\Documents\\GitHub\\ApplicationInspector\\RulesEngine";

public AnalyzeBenchmark()
{
Expand All @@ -36,7 +36,8 @@ public void AnalyzeMultiThread()
{
SourcePath = path,
SingleThread = false,
IgnoreDefaultRules = false
IgnoreDefaultRules = false,
FilePathExclusions = "bin,obj"
});

AnalyzeResult analyzeResult = command.GetResult();
Expand Down
20 changes: 20 additions & 0 deletions RulesEngine/OatSubstringIndexClause.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright (C) Microsoft. All rights reserved. Licensed under the MIT License.

using Microsoft.CST.OAT;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is OatSubstringIIndex generic enough to move into OAT: https://github.com/microsoft/OAT/tree/main/OAT/Operations?, or did I get that wrong?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A few issues.

It's not much different than the contains operator that already exists and is much more broad in terms of the data types it can work on.

Secondly, built-in operations in OAT should not be for one specific data type (for example this just works for strings).

The main distinction is that we are capturing and returning the index of the match, a behavior that is not mirrored in any other OAT operation, and doesn't make contextual sense outside of simple string matches.

See the existing AI code and AI uses it's own regex instead of the OAT regex because OAT doesn't deal with Boundary objects, that's a AI concept.


namespace Microsoft.ApplicationInspector.RulesEngine
{
public class OATSubstringIndexClause : Clause
{
public OATSubstringIndexClause(PatternScope[] scopes, string? field = null, bool useWordBoundaries = false) : base(Operation.Custom, field)
{
Scopes = scopes;
CustomOperation = "SubstringIndex";
UseWordBoundaries = useWordBoundaries;
}

public PatternScope[] Scopes { get; }

public bool UseWordBoundaries {get;}
}
}
101 changes: 101 additions & 0 deletions RulesEngine/OatSubstringIndexOperation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
using Microsoft.CST.OAT;
using Microsoft.CST.OAT.Operations;
using Microsoft.CST.OAT.Utils;
using Serilog;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text.RegularExpressions;

namespace Microsoft.ApplicationInspector.RulesEngine
{
/// <summary>
/// The Custom Operation to enable identification of pattern index in result used by Application Inspector to report why a given
/// result was matched and to retrieve other pattern level meta-data
/// </summary>
public class OATSubstringIndexOperation : OatOperation
{
/// <summary>
/// Create an OatOperation given an analyzer
/// </summary>
/// <param name="analyzer">The analyzer context to work with</param>
public OATSubstringIndexOperation(Analyzer analyzer) : base(Operation.Custom, analyzer)
{
CustomOperation = "SubstringIndex";
OperationDelegate = SubstringIndexOperationDelegate;
ValidationDelegate = SubstringIndexValidationDelegate;
}

public IEnumerable<Violation> SubstringIndexValidationDelegate(CST.OAT.Rule rule, Clause clause)
{
if (clause.Data?.Count == null || clause.Data?.Count == 0)
{
yield return new Violation(string.Format(Strings.Get("Err_ClauseNoData"), rule.Name, clause.Label ?? rule.Clauses.IndexOf(clause).ToString(CultureInfo.InvariantCulture)), rule, clause);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

out of curiosity, how is yield helping here?

Copy link
Contributor Author

@gfs gfs May 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The api you implement is an enumerable so

  1. You can Fail fast when validating on the first error if you don't care what they all are
  2. The function doesn't have to keep a list of the found errors.

You must implement this method as an IEnumerable to inherit from OATOperation.

}
if (clause.DictData != null && clause.DictData?.Count > 0)
{
yield return new Violation(string.Format(Strings.Get("Err_ClauseDictDataUnexpected"), rule.Name, clause.Label ?? rule.Clauses.IndexOf(clause).ToString(CultureInfo.InvariantCulture), clause.Operation.ToString()), rule, clause);
}
}

/// <summary>
/// Returns results with pattern index and Boundary as a tuple to enable retrieval of Rule pattern level meta-data like Confidence and report the
/// pattern that was responsible for the match
/// </summary>
/// <param name="clause"></param>
/// <param name="state1"></param>
/// <param name="state2"></param>
/// <param name="captures"></param>
/// <returns></returns>
public OperationResult SubstringIndexOperationDelegate(Clause clause, object? state1, object? state2, IEnumerable<ClauseCapture>? captures)
{
var comparisonType = clause.Arguments.Contains("i") ? StringComparison.InvariantCultureIgnoreCase : StringComparison.InvariantCulture;
if (state1 is TextContainer tc && clause is OATSubstringIndexClause src)
{
if (clause.Data is List<string> stringList && stringList.Any())
{
var outmatches = new List<(int, Boundary)>();//tuple results i.e. pattern index and where

for (int i = 0; i < stringList.Count; i++)
{
var idx = tc.FullContent.IndexOf(stringList[i], comparisonType);
while (idx != -1)
{
bool skip = false;
if (src.UseWordBoundaries)
{
if (idx > 0 && char.IsLetterOrDigit(tc.FullContent[idx - 1]))
{
skip = true;
}
if (idx + stringList[i].Length < tc.FullContent.Length && char.IsLetterOrDigit(tc.FullContent[idx + stringList[i].Length]))
{
skip = true;
}
}
if (!skip)
{
Boundary newBoundary = new Boundary()
{
Length = stringList[i].Length,
Index = idx
};
if (tc.ScopeMatch(src.Scopes, newBoundary))
{
outmatches.Add((i, newBoundary));
}
}
idx = tc.FullContent.IndexOf(stringList[i], idx + stringList[i].Length, comparisonType);
}
}

var result = src.Invert ? outmatches.Count == 0 : outmatches.Count > 0;
return new OperationResult(result, result && src.Capture ? new TypedClauseCapture<List<(int, Boundary)>>(clause, outmatches, state1) : null);
}
}
return new OperationResult(false, null);
}
}
}