Skip to content

Commit

Permalink
Merge pull request #25 from microsoft/feature/RoslynImprovements
Browse files Browse the repository at this point in the history
Major Roslyn Improvements to Memory and Running Time
  • Loading branch information
TravisOnGit committed Aug 14, 2020
2 parents 3611106 + 8047f3f commit ff083e0
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"ChildSelector": [
{
"Label": "Tardigrade_Success",
"ShouldSelect": "C#|Session.GetLastActionResponse().Status == \"Success\"",
"ShouldSelect": "C#|(await Session.GetLastActionResponseAsync()).Status == \"Success\"",
"Child": "Tardigrade_Success"
},
{
Expand Down
37 changes: 34 additions & 3 deletions Forge.TreeWalker.UnitTests/test/ExecutorUnitTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
namespace Microsoft.Forge.TreeWalker.UnitTests
{
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Scripting;
using Microsoft.VisualStudio.TestTools.UnitTesting;

using Microsoft.Forge.ExternalTypes;
Expand Down Expand Up @@ -180,8 +182,10 @@ public void TestExecutor_Success_CompileExpressionWithForgeDefaultDependenciesBe
{
this.UserContext.Foo = "Foo";
List<Type> dependencies = new List<Type>();

// Tasks dependency needed by Forge by default.
dependencies.Add(typeof(Task));

// Reflection dependency needed by Forge for runtime compilation.
dependencies.Add(typeof(Type));

Expand Down Expand Up @@ -225,6 +229,7 @@ public void TestExecutor_Success_ChangingFunctionReturnType()
// Changing return type of GetCount
string expression = "UserContext.GetCount = new Func<string>(() => \"Test\")";
Assert.AreEqual(this.UserContext.GetCount(), 1);

// Since expected return type has been updated, this should pass
ex.Execute<Func<string>>(expression).GetAwaiter().GetResult();
Assert.AreEqual(this.UserContext.GetCount(), "Test");
Expand Down Expand Up @@ -301,12 +306,13 @@ public void TestExecutor_ScriptCacheContainsKey()
string expression = "UserContext.Foo == \"Bar\" && UserContext.GetTopic(\"TopicName\").ResourceType == \"Node\"";

// Test - confirm ExpressionExecutor script cache does not contain script before executing.
ExpressionExecutor ex = new ExpressionExecutor(session: null, userContext: this.UserContext);
Assert.IsFalse(ex.ScriptCacheContainsKey(expression));
ConcurrentDictionary<string, Script<object>> scriptCache = new ConcurrentDictionary<string, Script<object>>();
ExpressionExecutor ex = new ExpressionExecutor(session: null, userContext: this.UserContext, dependencies: null, scriptCache: scriptCache);
Assert.IsFalse(scriptCache.ContainsKey(expression));

// Test - confirm ExpressionExecutor script cache does contain script after executing.
Assert.IsTrue(ex.Execute<bool>(expression).GetAwaiter().GetResult());
Assert.IsTrue(ex.ScriptCacheContainsKey(expression));
Assert.IsTrue(scriptCache.ContainsKey(expression));
}

[TestMethod]
Expand Down Expand Up @@ -341,5 +347,30 @@ public void TestExecutor_TreeInputNull_Success()
Assert.IsTrue(ex.Execute<bool>(expression).GetAwaiter().GetResult(), "Expected ExpressionExecutor to successfully evaluate a true expression using TreeInput as null.");
}

[TestMethod]
public void TestExecutor_ParentScriptBehavior()
{
// Test - Initialize an ExpressionExecutor and confirm the parentScriptTask is not immediately completed.
// It should take 2~ seconds to compile and run the parentScript.
ConcurrentDictionary<string, Script<object>> scriptCache = new ConcurrentDictionary<string, Script<object>>();
ExpressionExecutor ex = new ExpressionExecutor(session: null, userContext: this.UserContext, dependencies: null, scriptCache: scriptCache);

if (ex.parentScriptTask.IsCompleted)
{
Assert.Fail("Do not expect parentScriptTask to be completed immediately after ExpressionExecutor is initialized.");
}

// Test - After ScriptCache is initialized with parentScript, subsuquent ExpressionExecutor initialization should happen quickly.
ex.parentScriptTask.Wait();
ex = new ExpressionExecutor(session: null, userContext: this.UserContext, dependencies: null, scriptCache: scriptCache);

if (!ex.parentScriptTask.IsCompleted)
{
Assert.Fail("Expect parentScriptTask to be completed on ExpressionExecutor initialize when using an already initialized scriptCache.");
}

// Test - Confirm ParentScriptCode is present in the ScriptCache after initialization.
Assert.IsTrue(scriptCache.ContainsKey(ExpressionExecutor.ParentScriptCode));
}
}
}
19 changes: 12 additions & 7 deletions Forge.TreeWalker.UnitTests/test/TreeWalkerUnitTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@
namespace Microsoft.Forge.TreeWalker.UnitTests
{
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis.Scripting;
using Microsoft.VisualStudio.TestTools.UnitTesting;

using Microsoft.Forge.DataContracts;
Expand All @@ -31,12 +33,13 @@ public class TreeWalkerUnitTests

private Guid sessionId;
private IForgeDictionary forgeState = new ForgeDictionary(new Dictionary<string, object>(), Guid.Empty, Guid.Empty);
private ForgeUserContext UserContext = new ForgeUserContext();
private ITreeWalkerCallbacks callbacks;
private CancellationToken token;
private TreeWalkerParameters parameters;
private TreeWalkerSession session;
private Dictionary<string, ForgeTree> forgeTrees = new Dictionary<string, ForgeTree>();
private ConcurrentDictionary<string, Script<object>> scriptCache = new ConcurrentDictionary<string, Script<object>>();


public void TestInitialize(string jsonSchema, string treeName = null)
{
Expand All @@ -53,11 +56,12 @@ public void TestInitialize(string jsonSchema, string treeName = null)
this.callbacks,
this.token)
{
UserContext = this.UserContext,
UserContext = new ForgeUserContext(),
ForgeActionsAssembly = typeof(CollectDiagnosticsAction).Assembly,
InitializeSubroutineTree = this.InitializeSubroutineTree,
TreeName = treeName,
Dependencies = new List<Type>() { typeof(FooEnum) }
Dependencies = new List<Type>() { typeof(FooEnum) },
ScriptCache = this.scriptCache
};

this.session = new TreeWalkerSession(this.parameters);
Expand All @@ -78,7 +82,7 @@ public void TestInitializeWithForgeTree(ForgeTree forgeTree, string treeName = n
this.callbacks,
this.token)
{
UserContext = this.UserContext,
UserContext = new ForgeUserContext(),
ForgeActionsAssembly = typeof(CollectDiagnosticsAction).Assembly,
InitializeSubroutineTree = this.InitializeSubroutineTree,
TreeName = treeName,
Expand Down Expand Up @@ -1122,14 +1126,15 @@ private TreeWalkerSession InitializeSubroutineTree(SubroutineInput subroutineInp
forgeTrees[subroutineInput.TreeName],
new ForgeDictionary(new Dictionary<string, object>(), parentParameters.RootSessionId, subroutineSessionId),
this.callbacks,
this.token)
parentParameters.Token)
{
UserContext = this.UserContext,
UserContext = parentParameters.UserContext,
ForgeActionsAssembly = typeof(CollectDiagnosticsAction).Assembly,
InitializeSubroutineTree = this.InitializeSubroutineTree,
RootSessionId = parentParameters.RootSessionId,
TreeName = subroutineInput.TreeName,
TreeInput = subroutineInput.TreeInput
TreeInput = subroutineInput.TreeInput,
ScriptCache = parentParameters.ScriptCache
};

return new TreeWalkerSession(subroutineParameters);
Expand Down
140 changes: 104 additions & 36 deletions Forge.TreeWalker/src/ExpressionExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@ namespace Microsoft.Forge.TreeWalker
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Reflection;
using System.Threading.Tasks;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp.Scripting;
using Microsoft.CodeAnalysis.Scripting;

Expand All @@ -23,19 +25,39 @@ namespace Microsoft.Forge.TreeWalker
public class ExpressionExecutor
{
/// <summary>
/// List of external type dependencies needed to compile expressions.
/// Code that Forge uses to prime the parentScript on initialization.
/// Evaluating any random code moves roughly 600ms from the first expression to initializing the parentScript.
/// The first expression still takes roughly 100ms when tested.
/// The code block is arbitrary, though I did find in testing that the first expression ran quicker when the logic was similar to the parentScriptCode.
/// </summary>
private List<Type> dependencies;
public static string ParentScriptCode = "(1+1).ToString()";

/// <summary>
/// Script cache used to cache and re-use compiled Roslyn scripts.
/// The parentScriptTask kicks off initializing the Roslyn parentScript asynchronously, allowing ExpressionExecutor to construct very quickly.
/// Initializing the Roslyn parentScript takes about 2 seconds. This time is saved if the application takes time to initialize before the first WalkTree call.
/// </summary>
private ConcurrentDictionary<string, Script<object>> scriptCache;
public Task parentScriptTask { get; private set; }

/// <summary>
/// The parentScript that, upon initialization, gets Created, RunAsync, and added to the ScriptCache.
/// All expressions that get Executed are continued from the parentScript, avoiding additional compiles.
///
/// Before improvement - each unique expression would cost 25MB, take 2 seconds on first execution, then 0ms on further executions.
/// After improvement - only the parentScript costs 25MB and takes 2 seconds. Each unique expression costs < 0.5MB, takes 15ms on first execution, then 0ms on further executions.
/// </summary>
private Script<object> parentScript;

/// <summary>
/// List of external type dependencies needed to compile expressions.
/// </summary>
private List<Type> dependencies;

/// <summary>
/// Roslyn script options.
/// The ScriptCache holds Roslyn Scripts that are created using parentScript.ContinueWith.
/// This saves on memory and time since these continued Scripts use the already compiled parentScript as a base.
/// The parentScript gets asynchronously compiled, ran, and cached on initialization.
/// </summary>
private ScriptOptions scriptOptions;
private ConcurrentDictionary<string, Script<object>> scriptCache;

/// <summary>
/// Global parameters passed to Roslyn scripts that can be referenced inside expressions.
Expand All @@ -59,6 +81,7 @@ public ExpressionExecutor(ITreeSession session, object userContext, List<Type> d
Session = session,
TreeInput = treeInput
};

this.scriptCache = scriptCache ?? new ConcurrentDictionary<string, Script<object>>();
this.Initialize();
}
Expand Down Expand Up @@ -103,53 +126,71 @@ public ExpressionExecutor(ITreeSession session, object userContext)
/// <returns>The T value of the evaluated code.</returns>
public async Task<T> Execute<T>(string expression)
{
var script = this.scriptCache.GetOrAdd(
await this.parentScriptTask;

Script<object> expressionScript = this.scriptCache.GetOrAdd(
expression,
(key) => CSharpScript.Create<object>(
string.Format("return {0};", expression),
this.scriptOptions,
typeof(CodeGenInputParams)));
(key) => this.parentScript.ContinueWith(string.Format("return {0};", expression)));

// Execute script and return the result.
// Parse Enum types explicitly since they cannot be casted directly.
var temp = (await script.RunAsync(this.parameters).ConfigureAwait(false)).ReturnValue;
return typeof(T).IsEnum ? (T)Enum.Parse(typeof(T), temp.ToString()) : (T)temp;
object result = (await expressionScript.RunAsync(this.parameters).ConfigureAwait(false)).ReturnValue;
return typeof(T).IsEnum ? (T)Enum.Parse(typeof(T), result.ToString()) : (T)result;
}

/// <summary>
/// Initializes Roslyn script options with all the required assemblies, references, and external dependencies.
/// </summary>
private void Initialize()
{
this.scriptOptions = ScriptOptions.Default;
if (this.scriptCache.TryGetValue(ParentScriptCode, out this.parentScript))
{
// The parentScript is already initialized.
this.parentScriptTask = Task.CompletedTask;
return;
}

// Add references to required assemblies.
Assembly mscorlib = typeof(object).Assembly;
Assembly cSharpAssembly = typeof(Microsoft.CSharp.RuntimeBinder.CSharpArgumentInfo).Assembly;
this.scriptOptions = this.scriptOptions.AddReferences(mscorlib, cSharpAssembly);
this.parentScriptTask = Task.Run(async () =>
{
ScriptOptions scriptOptions = ScriptOptions.Default.WithMetadataResolver(new MissingResolver());
// Add required namespaces.
this.scriptOptions = this.scriptOptions.AddImports("System");
this.scriptOptions = this.scriptOptions.AddImports("System.Threading.Tasks");
// Add references to required assemblies.
Assembly mscorlib = typeof(object).Assembly;
Assembly systemCore = typeof(System.Linq.Enumerable).Assembly;
Assembly cSharpAssembly = typeof(Microsoft.CSharp.RuntimeBinder.CSharpArgumentInfo).Assembly;
scriptOptions = scriptOptions.AddReferences(mscorlib, systemCore, cSharpAssembly);
string systemCoreAssemblyName = mscorlib.GetName().Name;
// Add required namespaces.
scriptOptions = scriptOptions.AddImports(
"System",
"System.Threading.Tasks");
// Add external dependencies.
if (this.dependencies != null)
{
foreach (Type type in this.dependencies)
{
string fullAssemblyName = type.Assembly.GetName().Name;
string systemCoreAssemblyName = mscorlib.GetName().Name;
// While adding the reference again is okay, we can not AddImports for systemCoreAssembly.
if (fullAssemblyName == systemCoreAssemblyName)
// Add external dependencies.
if (this.dependencies != null)
{
foreach (Type type in this.dependencies)
{
continue;
}
string fullAssemblyName = type.Assembly.GetName().Name;
// While adding the reference again is okay, we can not AddImports for systemCoreAssembly.
if (fullAssemblyName == systemCoreAssemblyName)
{
continue;
}
this.scriptOptions = this.scriptOptions.AddReferences(type.Assembly).AddImports(type.Namespace);
scriptOptions = scriptOptions.AddReferences(type.Assembly).AddImports(type.Namespace);
}
}
}
// Create the parentScript and add it to scriptCache. Execute the parentScript so Roslyn is primed to evaluate further expressions.
this.parentScript = this.scriptCache.GetOrAdd(
ParentScriptCode,
(key) => CSharpScript.Create<object>(ParentScriptCode, scriptOptions, typeof(CodeGenInputParams)));
await this.parentScript.RunAsync(this.parameters).ConfigureAwait(false);
});
}

/// <summary>
Expand All @@ -163,8 +204,10 @@ public bool ScriptCacheContainsKey(string expression)
}

/// <summary>
/// This class defines the global parameter that will be
/// passed into the Roslyn expression evaluator.
/// This class defines the global parameter that will be passed into the Roslyn expression evaluator.
///
/// TODO: When Creating a Roslyn Script, the entire Assembly that the passed in GlobalsType resides in gets loaded.
/// An optimization would be to move this CodeGenInputParams to its own project/assembly.
/// </summary>
public class CodeGenInputParams
{
Expand All @@ -185,5 +228,30 @@ public class CodeGenInputParams
/// </summary>
public dynamic TreeInput { get; set; }
}

/// <summary>
/// The MissingResolve class is used to reduce the memory consumption of Roslyn.
/// This is accomplished by not loading missing references.
/// The end result is that Creating/Compiling a Script takes 25MB instead of 75MB.
/// </summary>
private class MissingResolver : Microsoft.CodeAnalysis.MetadataReferenceResolver
{
public override bool Equals(object other)
{
throw new NotImplementedException();
}

public override int GetHashCode()
{
throw new NotImplementedException();
}

public override bool ResolveMissingAssemblies => false;

public override ImmutableArray<PortableExecutableReference> ResolveReference(string reference, string baseFilePath, MetadataReferenceProperties properties)
{
throw new NotImplementedException();
}
}
}
}
Loading

0 comments on commit ff083e0

Please sign in to comment.