Skip to content

Commit

Permalink
Added support to disable kernel caches and automatic disposal of kern…
Browse files Browse the repository at this point in the history
…els and memory buffers.

See #10.
  • Loading branch information
m4rs-mt committed Feb 28, 2019
1 parent 15fb01e commit 616fdcc
Show file tree
Hide file tree
Showing 5 changed files with 183 additions and 44 deletions.
49 changes: 49 additions & 0 deletions Src/ILGPU/ContextFlags.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace ILGPU
/// [ 0 - 7] = debugging settings
/// [ 8 - 15] = code generation settings
/// [16 - 23] = transformation settings
/// [24 - 32] = accelerator settings
/// </remarks>
[Flags]
public enum ContextFlags : int
Expand Down Expand Up @@ -118,6 +119,54 @@ public enum ContextFlags : int
/// (e.g. for debugging purposes).
/// </summary>
DisableConstantPropagation = 1 << 20,

// Accelerator settings

/// <summary>
/// Disables all kernel-loading caches.
/// </summary>
/// <remarks>
/// However, IR nodes, type information and debug information will still
/// be cached, since they are used for different kernel compilation operations.
/// </remarks>
DisableKernelCaching = 1 << 24,

/// <summary>
/// Disables automatic disposal of memory buffers in the scope of ILGPU GC threads.
/// It should only be used by experienced users.
/// </summary>
/// <remarks>
/// In theory, allocated memory buffers will be disposed automatically by the .Net GC.
/// However, disposing accelerator objects before their associated memory buffers have been
/// freed will end up in exceptions and sometimes driver crashes on different systems.
/// If you disable automatic buffer disposal, you have to ensure that all accelerator
/// child objects have been freed manually before disposing the associated accelerator object.
/// </remarks>
DisableAutomaticBufferDisposal = 1 << 25,

/// <summary>
/// Disables automatic disposal of kernels in the scope of ILGPU GC threads. This is dangerous as the
/// 'default' kernel-loading methods do not return <see cref="Runtime.Kernel"/> instances that can
/// be disposed manually.
/// It should only be used by experienced users.
/// </summary>
/// <remarks>
/// In theory, allocated accelerator kernels will be disposed automatically by the .Net GC.
/// However, disposing accelerator objects before their associated kernels have been
/// freed will end up in exceptions and sometimes driver crashes on different systems.
/// If you disable automatic kernel disposal, you have to ensure that all accelerator
/// child objects have been freed manually before disposing the associated accelerator object.
/// </remarks>
DisableAutomaticKernelDisposal = 1 << 26,

/// <summary>
/// Disables kernel caching and automatic disposal of memory buffers and kernels.
/// It should only be used by experienced users.
/// </summary>
DisableAcceleratorGC =
DisableKernelCaching |
DisableAutomaticBufferDisposal |
DisableAutomaticKernelDisposal
}

/// <summary>
Expand Down
24 changes: 20 additions & 4 deletions Src/ILGPU/Runtime/Accelerator.GC.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ partial class Accelerator
#region Instance

/// <summary>
/// True, iff the GC thread is enabled.
/// True, if the GC thread is activated.
/// </summary>
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
private volatile bool gcEnabled = true;
private volatile bool gcActivated = false;

/// <summary>
/// The child-object GC thread
Expand All @@ -36,6 +36,10 @@ partial class Accelerator
/// </summary>
private void InitGC()
{
if (Context.HasFlags(ContextFlags.DisableAcceleratorGC))
return;

gcActivated = true;
gcThread = new Thread(GCThread)
{
Name = "ILGPUAcceleratorGCThread",
Expand All @@ -48,16 +52,28 @@ private void InitGC()
/// </summary>
private void DisposeGC()
{
if (!gcActivated)
return;

lock (syncRoot)
{
gcEnabled = false;
gcActivated = false;
Monitor.Pulse(syncRoot);
}
gcThread.Join();
}

#endregion

#region Properties

/// <summary>
/// Returns true if the GC thread is enabled.
/// </summary>
private bool GCEnabled => gcThread != null;

#endregion

#region Methods

/// <summary>
Expand All @@ -78,7 +94,7 @@ private void GCThread()
{
lock (syncRoot)
{
while (gcEnabled)
while (gcActivated)
{
Monitor.Wait(syncRoot);

Expand Down
19 changes: 19 additions & 0 deletions Src/ILGPU/Runtime/Accelerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,14 @@ internal Accelerator(Context context, AcceleratorType type)
{
Context = context ?? throw new ArgumentNullException(nameof(context));
AcceleratorType = type;

AutomaticBufferDisposalEnabled = !context.HasFlags(
ContextFlags.DisableAutomaticBufferDisposal);
AutomaticKernelDisposalEnabled = !context.HasFlags(
ContextFlags.DisableAutomaticKernelDisposal);
InitKernelCache();
InitGC();

memoryCache = new MemoryBufferCache(this);
}

Expand Down Expand Up @@ -207,6 +214,18 @@ internal Accelerator(Context context, AcceleratorType type)
/// </summary>
public MemoryBufferCache MemoryCache => memoryCache;

/// <summary>
/// See <see cref="ContextFlags.DisableAutomaticBufferDisposal"/> for more information.
/// </summary>
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
private bool AutomaticBufferDisposalEnabled { get; }

/// <summary>
/// See <see cref="ContextFlags.DisableAutomaticKernelDisposal"/> for more information.
/// </summary>
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
private bool AutomaticKernelDisposalEnabled { get; }

#endregion

#region Methods
Expand Down
15 changes: 13 additions & 2 deletions Src/ILGPU/Runtime/AcceleratorObject.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ protected AcceleratorObject()
/// <param name="accelerator">The associated accelerator.</param>
protected AcceleratorObject(Accelerator accelerator)
{
Accelerator = accelerator ?? throw new ArgumentNullException(nameof(accelerator));
Debug.Assert(accelerator != null, "Invalid accelerator");

Accelerator = accelerator;
AcceleratorType = accelerator.AcceleratorType;
accelerator.RegisterChildObject(this);
}
Expand Down Expand Up @@ -110,9 +112,13 @@ partial class Accelerator
#region Properties

/// <summary>
/// Returns the number of the associated child objects that depend
/// Returns the number of the registered child objects that depend
/// on this accelerator object.
/// </summary>
/// <remarks>
/// Note that this number is affected by the flags
/// <see cref="ContextFlags.DisableAutomaticBufferDisposal"/> and <see cref="ContextFlags.DisableAutomaticKernelDisposal"/>.
/// </remarks>
public int NumberChildObjects
{
get
Expand Down Expand Up @@ -141,6 +147,11 @@ public int NumberChildObjects
internal void RegisterChildObject<T>(T child)
where T : AcceleratorObject
{
if (!GCEnabled ||
!AutomaticBufferDisposalEnabled && child is MemoryBuffer ||
!AutomaticKernelDisposalEnabled && child is Kernel)
return;

var objRef = new WeakReference<AcceleratorObject>(child);
lock (syncRoot)
{
Expand Down
120 changes: 82 additions & 38 deletions Src/ILGPU/Runtime/KernelCache.cs
Original file line number Diff line number Diff line change
Expand Up @@ -283,20 +283,35 @@ private interface IKernelLoader
/// A cache for compiled kernel objects.
/// </summary>
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
private Dictionary<CachedCompiledKernelKey, WeakReference<CompiledKernel>> compiledKernelCache =
new Dictionary<CachedCompiledKernelKey, WeakReference<CompiledKernel>>();
private Dictionary<CachedCompiledKernelKey, WeakReference<CompiledKernel>> compiledKernelCache;

/// <summary>
/// A cache for loaded kernel objects.
/// </summary>
[DebuggerBrowsable(DebuggerBrowsableState.Never)]
private Dictionary<CachedKernelKey, CachedKernel> kernelCache =
new Dictionary<CachedKernelKey, CachedKernel>();
private Dictionary<CachedKernelKey, CachedKernel> kernelCache;

/// <summary>
/// Initializes the local kernel cache.
/// </summary>
private void InitKernelCache()
{
if (Context.HasFlags(ContextFlags.DisableKernelCaching))
return;

compiledKernelCache = new Dictionary<CachedCompiledKernelKey, WeakReference<CompiledKernel>>();
kernelCache = new Dictionary<CachedKernelKey, CachedKernel>();
}

#endregion

#region Internal Properties

/// <summary>
/// Returns true if the kernel cache is enabled.
/// </summary>
private bool KernelCacheEnabled => kernelCache != null;

/// <summary>
/// True, iff a GC run is requested to clean disposed child kernels.
/// </summary>
Expand All @@ -309,6 +324,25 @@ private interface IKernelLoader

#region Methods

/// <summary>
/// Loads a kernel specified by the given method without using internal caches.
/// </summary>
/// <typeparam name="TKernelLoader">The type of the custom kernel loader.</typeparam>
/// <param name="method">The method to compile into a kernel.</param>
/// <param name="specialization">The kernel specialization.</param>
/// <param name="kernelLoader">The kernel loader.</param>
/// <returns>The loaded kernel.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private Kernel LoadGenericKernelDirect<TKernelLoader>(
MethodInfo method,
KernelSpecialization specialization,
ref TKernelLoader kernelLoader)
where TKernelLoader : struct, IKernelLoader
{
var compiledKernel = CompileKernel(method, specialization);
return kernelLoader.LoadKernel(this, compiledKernel);
}

/// <summary>
/// Loads a kernel specified by the given method.
/// </summary>
Expand All @@ -326,39 +360,41 @@ private interface IKernelLoader
{
if (method == null)
throw new ArgumentNullException(nameof(method));
var cachedCompiledKernelKey = new CachedCompiledKernelKey(method, specialization);
var cachedKey = new CachedKernelKey(cachedCompiledKernelKey, kernelLoader.GroupSize);
lock (syncRoot)
if (KernelCacheEnabled)
{
if (!kernelCache.TryGetValue(cachedKey, out CachedKernel cached) ||
!cached.TryGetKernel(out Kernel result))
var cachedCompiledKernelKey = new CachedCompiledKernelKey(method, specialization);
var cachedKey = new CachedKernelKey(cachedCompiledKernelKey, kernelLoader.GroupSize);
lock (syncRoot)
{
var compiledKernel = CompileKernel(method, specialization);
result = kernelLoader.LoadKernel(this, compiledKernel);
kernelCache[cachedKey] = new CachedKernel(
cached.UpdateReference(result),
kernelLoader.GroupSize,
kernelLoader.MinGridSize);
}
else
{
kernelLoader.MinGridSize = cached.MinGridSize;
kernelLoader.GroupSize = cached.GroupSize;
if (!kernelCache.TryGetValue(cachedKey, out CachedKernel cached) ||
!cached.TryGetKernel(out Kernel result))
{
result = LoadGenericKernelDirect(method, specialization, ref kernelLoader);
kernelCache[cachedKey] = new CachedKernel(
cached.UpdateReference(result),
kernelLoader.GroupSize,
kernelLoader.MinGridSize);
}
else
{
kernelLoader.MinGridSize = cached.MinGridSize;
kernelLoader.GroupSize = cached.GroupSize;
}
RequestGC_SyncRoot();
return result;
}
RequestGC_SyncRoot();
return result;
}
else
return LoadGenericKernelDirect(method, specialization, ref kernelLoader);
}

/// <summary>
/// Compiles the given method into a <see cref="CompiledKernel"/>.
/// </summary>
/// <param name="method">The method to compile into a <see cref="CompiledKernel"/>.</param>
/// <returns>The compiled kernel.</returns>
public CompiledKernel CompileKernel(MethodInfo method)
{
return CompileKernel(method, KernelSpecialization.Empty);
}
public CompiledKernel CompileKernel(MethodInfo method) =>
CompileKernel(method, KernelSpecialization.Empty);

/// <summary>
/// Compiles the given method into a <see cref="CompiledKernel"/> using the given
Expand All @@ -376,22 +412,27 @@ public CompiledKernel CompileKernel(MethodInfo method, KernelSpecialization spec
if (!specialization.IsCompatibleWith(this))
throw new NotSupportedException(RuntimeErrorMessages.NotSupportedKernelSpecialization);

// Check and update cache
var cachedKey = new CachedCompiledKernelKey(method, specialization);
lock (syncRoot)
if (KernelCacheEnabled)
{
if (!compiledKernelCache.TryGetValue(cachedKey, out WeakReference<CompiledKernel> cached) ||
!cached.TryGetTarget(out CompiledKernel result))
// Check and update cache
var cachedKey = new CachedCompiledKernelKey(method, specialization);
lock (syncRoot)
{
result = Backend.Compile(method, specialization);
if (cached == null)
compiledKernelCache.Add(cachedKey, new WeakReference<CompiledKernel>(result));
else
cached.SetTarget(result);
if (!compiledKernelCache.TryGetValue(cachedKey, out WeakReference<CompiledKernel> cached) ||
!cached.TryGetTarget(out CompiledKernel result))
{
result = Backend.Compile(method, specialization);
if (cached == null)
compiledKernelCache.Add(cachedKey, new WeakReference<CompiledKernel>(result));
else
cached.SetTarget(result);
}
RequestGC_SyncRoot();
return result;
}
RequestGC_SyncRoot();
return result;
}
else
return Backend.Compile(method, specialization);
}

/// <summary>
Expand All @@ -400,6 +441,9 @@ public CompiledKernel CompileKernel(MethodInfo method, KernelSpecialization spec
/// <remarks>This method is invoked in the scope of the locked <see cref="syncRoot"/> object.</remarks>
private void KernelCacheGC_SyncRoot()
{
if (!KernelCacheEnabled)
return;

if (compiledKernelCache.Count >= MinNumberOfKernelsInGC)
{
var oldCompiledKernels = compiledKernelCache;
Expand Down

0 comments on commit 616fdcc

Please sign in to comment.