Skip to content

Commit

Permalink
Optimize partitioned logs by removing not relevant rules (#2369)
Browse files Browse the repository at this point in the history
* Optimize partitioned logs by removing not relevant rules

* refactor & test case

* Fix PerRunPerTarget strategy scenario

* Fix dotnet format issue

Co-authored-by: Eddy Nakamura <eddynaka@gmail.com>
  • Loading branch information
yongyan-gh and eddynaka committed Jun 21, 2021
1 parent cfb4f9b commit 744db99
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 6 deletions.
4 changes: 1 addition & 3 deletions src/Sarif.WorkItems/SarifWorkItemFiler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,6 @@ public virtual IReadOnlyList<SarifLog> SplitLogFile(SarifLog sarifLog)
this.FilingResult = FilingResult.None;
this.FiledWorkItems = new List<WorkItemModel>();

sarifLog = sarifLog ?? throw new ArgumentNullException(nameof(sarifLog));

OptionallyEmittedData optionallyEmittedData = this.FilingContext.DataToRemove;
if (optionallyEmittedData != OptionallyEmittedData.None)
{
Expand Down Expand Up @@ -257,7 +255,7 @@ public virtual IReadOnlyList<SarifLog> SplitLogFile(SarifLog sarifLog)
}

Logger.LogDebug("Begin splitting logs");
var partitioningVisitor = new PartitioningVisitor<string>(partitionFunction, deepClone: false);
var partitioningVisitor = new PartitioningVisitor<string>(partitionFunction, deepClone: false, remapRules: true);
partitioningVisitor.VisitSarifLog(sarifLog);

Logger.LogDebug("Begin retrieving split logs");
Expand Down
14 changes: 14 additions & 0 deletions src/Sarif/ExtensionMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,20 @@ public static IEnumerable<SarifLog> Split(this SarifLog sarifLog, SplittingStrat
partitionFunction = (result) => result.Run.GetHashCode().ToString();
break;
}
case SplittingStrategy.PerRunPerTarget:
{
foreach (Run run in sarifLog.Runs)
{
run.SetRunOnResults();
}
partitionFunction = (result) =>
{
string runHashCode = result.Run.GetHashCode().ToString();
string targetFile = result.Locations?.FirstOrDefault()?.PhysicalLocation?.ArtifactLocation?.Uri?.OriginalString;
return $"{runHashCode}:{targetFile}";
};
break;
}
default:
{
throw new NotImplementedException($"SplittingStrategy: {splittingStrategy}");
Expand Down
42 changes: 41 additions & 1 deletion src/Sarif/Visitors/PartitioningVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ public class PartitioningVisitor<T> : SarifRewritingVisitor where T : class, IEq
// the original log (if true) or from a shallow copy of the original log (if false).
private readonly bool deepClone;

// A flag presents if optimize rules for each emitted logs, by removing rules which are not referenced
// by existing results.
private readonly bool remapRules;

// The log file being partitioned.
private SarifLog originalLog;

Expand Down Expand Up @@ -103,10 +107,11 @@ public class PartitioningVisitor<T> : SarifRewritingVisitor where T : class, IEq
/// working set, but it is not safe to modify any of the resulting logs because this class
/// makes no guarantee about which objects are shared. The default is <c>false</c>.
/// </param>
public PartitioningVisitor(PartitionFunction<T> partitionFunction, bool deepClone)
public PartitioningVisitor(PartitionFunction<T> partitionFunction, bool deepClone, bool remapRules = true)
{
this.partitionFunction = partitionFunction;
this.deepClone = deepClone;
this.remapRules = remapRules;
}

/// <summary>
Expand Down Expand Up @@ -338,6 +343,11 @@ private SarifLog CreatePartitionLog(T partitionValue)
originalRun.Properties);
}

if (this.remapRules)
{
partitionRun.Tool.Driver.Rules = this.RemapResultsToRules(partitionRun, results);
}

partitionRun.Results = results;

// Construct a mapping from the indices in the original run to the indices
Expand Down Expand Up @@ -395,6 +405,36 @@ private SarifLog CreatePartitionLog(T partitionValue)
return partitionLog;
}

private IList<ReportingDescriptor> RemapResultsToRules(Run partitionRun, List<Result> results)
{
IList<ReportingDescriptor> rules = partitionRun.Tool.Driver.Rules;
if (rules == null || !rules.Any() || results == null || !results.Any())
{
return null;
}

var referencedRulesByIndex = new Dictionary<ReportingDescriptor, int>(new ReportingDescriptorEqualityComparer());
int index = 0;

foreach (Result result in results)
{
ReportingDescriptor currentRule = result.GetRule(partitionRun);
if (!referencedRulesByIndex.TryGetValue(currentRule, out int currentIndex))
{
currentIndex = index;
referencedRulesByIndex.Add(currentRule, index++);
}

// only update result RuleIndex if it has valid value
if (result.RuleIndex != -1)
{
result.RuleIndex = currentIndex;
}
}

return referencedRulesByIndex.OrderBy(dict => dict.Value).Select(kv => kv.Key).ToList();
}

// This class contains information accumulated from an individual run in the
// original log file that is used to create the runs in the partition log files.
private class PartitionRunInfo
Expand Down
88 changes: 88 additions & 0 deletions src/Test.UnitTests.Sarif/Core/SarifLogTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System;
using System.Collections.Generic;
using System.Linq;

using FluentAssertions;

Expand Down Expand Up @@ -82,6 +84,92 @@ public void SarifLog_SplitPerRun()
sarifLog.Split(SplittingStrategy.PerRun).Should().HaveCount(3);
}

[Fact]
public void SarifLog_SplitPerResult()
{
var random = new Random();
SarifLog sarifLog = RandomSarifLogGenerator.GenerateSarifLogWithRuns(random, 1);
IList<SarifLog> logs = sarifLog.Split(SplittingStrategy.PerResult).ToList();
logs.Count.Should().Be(5);
foreach (SarifLog log in logs)
{
// optimized partitioned log should only include rules referenced by its results
log.Runs.Count.Should().Be(1);
int ruleCount = log.Runs[0].Results.Select(r => r.RuleId).Distinct().Count();
ruleCount.Should().Be(log.Runs[0].Tool.Driver.Rules.Count);
}
}

[Fact]
public void SarifLog_SplitPerTarget()
{
var random = new Random();
SarifLog sarifLog = RandomSarifLogGenerator.GenerateSarifLogWithRuns(random, 1);
IList<SarifLog> logs = sarifLog.Split(SplittingStrategy.PerRunPerTarget).ToList();
logs.Count.Should().Be(
sarifLog.Runs[0].Results.Select(r => r.Locations[0].PhysicalLocation.ArtifactLocation.Uri).Distinct().Count());
foreach (SarifLog log in logs)
{
// optimized partitioned log should only include rules referenced by its results
log.Runs.Count.Should().Be(1);
int ruleCount = log.Runs[0].Results.Select(r => r.RuleId).Distinct().Count();
ruleCount.Should().Be(log.Runs[0].Tool.Driver.Rules.Count);

// verify result's RuleIndex reference to right rule
foreach (Result result in log.Runs[0].Results)
{
result.RuleId.Should().Be(
log.Runs[0].Tool.Driver.Rules.ElementAt(result.RuleIndex).Id);
}
}
}

[Fact]
public void SarifLog_SplitPerTarget_WithEmptyLocations()
{
var random = new Random();
SarifLog sarifLog = RandomSarifLogGenerator.GenerateSarifLogWithRuns(random, 1);

// set random result's location to empty
IList<Result> results = sarifLog.Runs.First().Results;
Result randomResult = results.ElementAt(random.Next(results.Count));
randomResult.Locations = null;
randomResult = results.ElementAt(random.Next(results.Count));
if (randomResult.Locations?.FirstOrDefault()?.PhysicalLocation != null)
{
randomResult.Locations.FirstOrDefault().PhysicalLocation = null;
}
randomResult = results.ElementAt(random.Next(results.Count));
if (randomResult.Locations?.FirstOrDefault()?.PhysicalLocation?.ArtifactLocation != null)
{
randomResult.Locations.FirstOrDefault().PhysicalLocation.ArtifactLocation = null;
}
randomResult = results.ElementAt(random.Next(results.Count));
if (randomResult.Locations?.FirstOrDefault()?.PhysicalLocation?.ArtifactLocation?.Uri != null)
{
randomResult.Locations.FirstOrDefault().PhysicalLocation.ArtifactLocation.Uri = null;
}

IList<SarifLog> logs = sarifLog.Split(SplittingStrategy.PerRunPerTarget).ToList();
logs.Count.Should().Be(
sarifLog.Runs[0].Results.Select(r => r.Locations?.FirstOrDefault()?.PhysicalLocation?.ArtifactLocation?.Uri).Distinct().Count());
foreach (SarifLog log in logs)
{
// optimized partitioned log should only include rules referenced by its results
log.Runs.Count.Should().Be(1);
int ruleCount = log.Runs[0].Results.Select(r => r.RuleId).Distinct().Count();
ruleCount.Should().Be(log.Runs[0].Tool.Driver.Rules.Count);

// verify result's RuleIndex reference to right rule
foreach (Result result in log.Runs[0].Results)
{
result.RuleId.Should().Be(
log.Runs[0].Tool.Driver.Rules.ElementAt(result.RuleIndex).Id);
}
}
}


private Run SerializeAndDeserialize(Run run)
{
return JsonConvert.DeserializeObject<Run>(JsonConvert.SerializeObject(run));
Expand Down
6 changes: 4 additions & 2 deletions src/Test.UnitTests.Sarif/RandomSarifLogGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,14 @@ public static IEnumerable<string> GenerateFakeFiles(string baseAddress, int coun
public static IList<Result> GenerateFakeResults(Random random, List<string> ruleIds, List<Uri> filePaths, int resultCount)
{
List<Result> results = new List<Result>();
int fileIndex = random.Next(filePaths.Count);
for (int i = 0; i < resultCount; i++)
{
int fileIndex = random.Next(filePaths.Count);
int ruleIndex = random.Next(ruleIds.Count);
results.Add(new Result()
{
RuleId = ruleIds[random.Next(ruleIds.Count)],
RuleId = ruleIds[ruleIndex],
RuleIndex = ruleIndex,
Locations = new Location[]
{
new Location
Expand Down

0 comments on commit 744db99

Please sign in to comment.