Added support to disable kernel caches and automatic disposal of kern…

…els and memory buffers. See #10.
m4rs-mt · Feb 28, 2019 · 616fdcc · 616fdcc
1 parent 15fb01e
commit 616fdcc
Show file tree

Hide file tree

Showing 5 changed files with 183 additions and 44 deletions.
diff --git a/Src/ILGPU/ContextFlags.cs b/Src/ILGPU/ContextFlags.cs
@@ -20,6 +20,7 @@ namespace ILGPU
     /// [ 0 -  7] = debugging settings
     /// [ 8 - 15] = code generation settings
     /// [16 - 23] = transformation settings
+    /// [24 - 32] = accelerator settings
     /// </remarks>
     [Flags]
     public enum ContextFlags : int
@@ -118,6 +119,54 @@ public enum ContextFlags : int
         /// (e.g. for debugging purposes).
         /// </summary>
         DisableConstantPropagation = 1 << 20,
+
+        // Accelerator settings
+
+        /// <summary>
+        /// Disables all kernel-loading caches.
+        /// </summary>
+        /// <remarks>
+        /// However, IR nodes, type information and debug information will still
+        /// be cached, since they are used for different kernel compilation operations.
+        /// </remarks>
+        DisableKernelCaching = 1 << 24,
+
+        /// <summary>
+        /// Disables automatic disposal of memory buffers in the scope of ILGPU GC threads.
+        /// It should only be used by experienced users.
+        /// </summary>
+        /// <remarks>
+        /// In theory, allocated memory buffers will be disposed automatically by the .Net GC.
+        /// However, disposing accelerator objects before their associated memory buffers have been
+        /// freed will end up in exceptions and sometimes driver crashes on different systems.
+        /// If you disable automatic buffer disposal, you have to ensure that all accelerator
+        /// child objects have been freed manually before disposing the associated accelerator object.
+        /// </remarks>
+        DisableAutomaticBufferDisposal = 1 << 25,
+
+        /// <summary>
+        /// Disables automatic disposal of kernels in the scope of ILGPU GC threads.  This is dangerous as the
+        /// 'default' kernel-loading methods do not return <see cref="Runtime.Kernel"/> instances that can
+        /// be disposed manually.
+        /// It should only be used by experienced users.
+        /// </summary>
+        /// <remarks>
+        /// In theory, allocated accelerator kernels will be disposed automatically by the .Net GC.
+        /// However, disposing accelerator objects before their associated kernels have been
+        /// freed will end up in exceptions and sometimes driver crashes on different systems.
+        /// If you disable automatic kernel disposal, you have to ensure that all accelerator
+        /// child objects have been freed manually before disposing the associated accelerator object.
+        /// </remarks>
+        DisableAutomaticKernelDisposal = 1 << 26,
+
+        /// <summary>
+        /// Disables kernel caching and automatic disposal of memory buffers and kernels.
+        /// It should only be used by experienced users.
+        /// </summary>
+        DisableAcceleratorGC =
+            DisableKernelCaching |
+            DisableAutomaticBufferDisposal |
+            DisableAutomaticKernelDisposal
     }
 
     /// <summary>

diff --git a/Src/ILGPU/Runtime/Accelerator.GC.cs b/Src/ILGPU/Runtime/Accelerator.GC.cs
@@ -20,10 +20,10 @@ partial class Accelerator
         #region Instance
 
         /// <summary>
-        /// True, iff the GC thread is enabled.
+        /// True, if the GC thread is activated.
         /// </summary>
         [DebuggerBrowsable(DebuggerBrowsableState.Never)]
-        private volatile bool gcEnabled = true;
+        private volatile bool gcActivated = false;
 
         /// <summary>
         /// The child-object GC thread
@@ -36,6 +36,10 @@ partial class Accelerator
         /// </summary>
         private void InitGC()
         {
+            if (Context.HasFlags(ContextFlags.DisableAcceleratorGC))
+                return;
+
+            gcActivated = true;
             gcThread = new Thread(GCThread)
             {
                 Name = "ILGPUAcceleratorGCThread",
@@ -48,16 +52,28 @@ private void InitGC()
         /// </summary>
         private void DisposeGC()
         {
+            if (!gcActivated)
+                return;
+
             lock (syncRoot)
             {
-                gcEnabled = false;
+                gcActivated = false;
                 Monitor.Pulse(syncRoot);
             }
             gcThread.Join();
         }
 
         #endregion
 
+        #region Properties
+
+        /// <summary>
+        /// Returns true if the GC thread is enabled.
+        /// </summary>
+        private bool GCEnabled => gcThread != null;
+
+        #endregion
+
         #region Methods
 
         /// <summary>
@@ -78,7 +94,7 @@ private void GCThread()
         {
             lock (syncRoot)
             {
-                while (gcEnabled)
+                while (gcActivated)
                 {
                     Monitor.Wait(syncRoot);
 

diff --git a/Src/ILGPU/Runtime/Accelerator.cs b/Src/ILGPU/Runtime/Accelerator.cs
@@ -118,7 +118,14 @@ internal Accelerator(Context context, AcceleratorType type)
         {
             Context = context ?? throw new ArgumentNullException(nameof(context));
             AcceleratorType = type;
+
+            AutomaticBufferDisposalEnabled = !context.HasFlags(
+                ContextFlags.DisableAutomaticBufferDisposal);
+            AutomaticKernelDisposalEnabled = !context.HasFlags(
+                ContextFlags.DisableAutomaticKernelDisposal);
+            InitKernelCache();
             InitGC();
+
             memoryCache = new MemoryBufferCache(this);
         }
 
@@ -207,6 +214,18 @@ internal Accelerator(Context context, AcceleratorType type)
         /// </summary>
         public MemoryBufferCache MemoryCache => memoryCache;
 
+        /// <summary>
+        /// See <see cref="ContextFlags.DisableAutomaticBufferDisposal"/> for more information.
+        /// </summary>
+        [DebuggerBrowsable(DebuggerBrowsableState.Never)]
+        private bool AutomaticBufferDisposalEnabled { get; }
+
+        /// <summary>
+        /// See <see cref="ContextFlags.DisableAutomaticKernelDisposal"/> for more information.
+        /// </summary>
+        [DebuggerBrowsable(DebuggerBrowsableState.Never)]
+        private bool AutomaticKernelDisposalEnabled { get; }
+
         #endregion
 
         #region Methods

diff --git a/Src/ILGPU/Runtime/AcceleratorObject.cs b/Src/ILGPU/Runtime/AcceleratorObject.cs
@@ -57,7 +57,9 @@ protected AcceleratorObject()
         /// <param name="accelerator">The associated accelerator.</param>
         protected AcceleratorObject(Accelerator accelerator)
         {
-            Accelerator = accelerator ?? throw new ArgumentNullException(nameof(accelerator));
+            Debug.Assert(accelerator != null, "Invalid accelerator");
+
+            Accelerator = accelerator;
             AcceleratorType = accelerator.AcceleratorType;
             accelerator.RegisterChildObject(this);
         }
@@ -110,9 +112,13 @@ partial class Accelerator
         #region Properties
 
         /// <summary>
-        /// Returns the number of the associated child objects that depend
+        /// Returns the number of the registered child objects that depend
         /// on this accelerator object.
         /// </summary>
+        /// <remarks>
+        /// Note that this number is affected by the flags
+        /// <see cref="ContextFlags.DisableAutomaticBufferDisposal"/> and <see cref="ContextFlags.DisableAutomaticKernelDisposal"/>.
+        /// </remarks>
         public int NumberChildObjects
         {
             get
@@ -141,6 +147,11 @@ public int NumberChildObjects
         internal void RegisterChildObject<T>(T child)
             where T : AcceleratorObject
         {
+            if (!GCEnabled ||
+                !AutomaticBufferDisposalEnabled && child is MemoryBuffer ||
+                !AutomaticKernelDisposalEnabled && child is Kernel)
+                return;
+
             var objRef = new WeakReference<AcceleratorObject>(child);
             lock (syncRoot)
             {

diff --git a/Src/ILGPU/Runtime/KernelCache.cs b/Src/ILGPU/Runtime/KernelCache.cs
@@ -283,20 +283,35 @@ private interface IKernelLoader
         /// A cache for compiled kernel objects.
         /// </summary>
         [DebuggerBrowsable(DebuggerBrowsableState.Never)]
-        private Dictionary<CachedCompiledKernelKey, WeakReference<CompiledKernel>> compiledKernelCache =
-            new Dictionary<CachedCompiledKernelKey, WeakReference<CompiledKernel>>();
+        private Dictionary<CachedCompiledKernelKey, WeakReference<CompiledKernel>> compiledKernelCache;
 
         /// <summary>
         /// A cache for loaded kernel objects.
         /// </summary>
         [DebuggerBrowsable(DebuggerBrowsableState.Never)]
-        private Dictionary<CachedKernelKey, CachedKernel> kernelCache =
-            new Dictionary<CachedKernelKey, CachedKernel>();
+        private Dictionary<CachedKernelKey, CachedKernel> kernelCache;
+
+        /// <summary>
+        /// Initializes the local kernel cache.
+        /// </summary>
+        private void InitKernelCache()
+        {
+            if (Context.HasFlags(ContextFlags.DisableKernelCaching))
+                return;
+
+            compiledKernelCache = new Dictionary<CachedCompiledKernelKey, WeakReference<CompiledKernel>>();
+            kernelCache = new Dictionary<CachedKernelKey, CachedKernel>();
+        }
 
         #endregion
 
         #region Internal Properties
 
+        /// <summary>
+        /// Returns true if the kernel cache is enabled.
+        /// </summary>
+        private bool KernelCacheEnabled => kernelCache != null;
+
         /// <summary>
         /// True, iff a GC run is requested to clean disposed child kernels.
         /// </summary>
@@ -309,6 +324,25 @@ private interface IKernelLoader
 
         #region Methods
 
+        /// <summary>
+        /// Loads a kernel specified by the given method without using internal caches.
+        /// </summary>
+        /// <typeparam name="TKernelLoader">The type of the custom kernel loader.</typeparam>
+        /// <param name="method">The method to compile into a kernel.</param>
+        /// <param name="specialization">The kernel specialization.</param>
+        /// <param name="kernelLoader">The kernel loader.</param>
+        /// <returns>The loaded kernel.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private Kernel LoadGenericKernelDirect<TKernelLoader>(
+            MethodInfo method,
+            KernelSpecialization specialization,
+            ref TKernelLoader kernelLoader)
+            where TKernelLoader : struct, IKernelLoader
+        {
+            var compiledKernel = CompileKernel(method, specialization);
+            return kernelLoader.LoadKernel(this, compiledKernel);
+        }
+
         /// <summary>
         /// Loads a kernel specified by the given method.
         /// </summary>
@@ -326,39 +360,41 @@ private interface IKernelLoader
         {
             if (method == null)
                 throw new ArgumentNullException(nameof(method));
-            var cachedCompiledKernelKey = new CachedCompiledKernelKey(method, specialization);
-            var cachedKey = new CachedKernelKey(cachedCompiledKernelKey, kernelLoader.GroupSize);
-            lock (syncRoot)
+            if (KernelCacheEnabled)
             {
-                if (!kernelCache.TryGetValue(cachedKey, out CachedKernel cached) ||
-                    !cached.TryGetKernel(out Kernel result))
+                var cachedCompiledKernelKey = new CachedCompiledKernelKey(method, specialization);
+                var cachedKey = new CachedKernelKey(cachedCompiledKernelKey, kernelLoader.GroupSize);
+                lock (syncRoot)
                 {
-                    var compiledKernel = CompileKernel(method, specialization);
-                    result = kernelLoader.LoadKernel(this, compiledKernel);
-                    kernelCache[cachedKey] = new CachedKernel(
-                        cached.UpdateReference(result),
-                        kernelLoader.GroupSize,
-                        kernelLoader.MinGridSize);
-                }
-                else
-                {
-                    kernelLoader.MinGridSize = cached.MinGridSize;
-                    kernelLoader.GroupSize = cached.GroupSize;
+                    if (!kernelCache.TryGetValue(cachedKey, out CachedKernel cached) ||
+                        !cached.TryGetKernel(out Kernel result))
+                    {
+                        result = LoadGenericKernelDirect(method, specialization, ref kernelLoader);
+                        kernelCache[cachedKey] = new CachedKernel(
+                            cached.UpdateReference(result),
+                            kernelLoader.GroupSize,
+                            kernelLoader.MinGridSize);
+                    }
+                    else
+                    {
+                        kernelLoader.MinGridSize = cached.MinGridSize;
+                        kernelLoader.GroupSize = cached.GroupSize;
+                    }
+                    RequestGC_SyncRoot();
+                    return result;
                 }
-                RequestGC_SyncRoot();
-                return result;
             }
+            else
+                return LoadGenericKernelDirect(method, specialization, ref kernelLoader);
         }
 
         /// <summary>
         /// Compiles the given method into a <see cref="CompiledKernel"/>.
         /// </summary>
         /// <param name="method">The method to compile into a <see cref="CompiledKernel"/>.</param>
         /// <returns>The compiled kernel.</returns>
-        public CompiledKernel CompileKernel(MethodInfo method)
-        {
-            return CompileKernel(method, KernelSpecialization.Empty);
-        }
+        public CompiledKernel CompileKernel(MethodInfo method) =>
+            CompileKernel(method, KernelSpecialization.Empty);
 
         /// <summary>
         /// Compiles the given method into a <see cref="CompiledKernel"/> using the given
@@ -376,22 +412,27 @@ public CompiledKernel CompileKernel(MethodInfo method, KernelSpecialization spec
             if (!specialization.IsCompatibleWith(this))
                 throw new NotSupportedException(RuntimeErrorMessages.NotSupportedKernelSpecialization);
 
-            // Check and update cache
-            var cachedKey = new CachedCompiledKernelKey(method, specialization);
-            lock (syncRoot)
+            if (KernelCacheEnabled)
             {
-                if (!compiledKernelCache.TryGetValue(cachedKey, out WeakReference<CompiledKernel> cached) ||
-                    !cached.TryGetTarget(out CompiledKernel result))
+                // Check and update cache
+                var cachedKey = new CachedCompiledKernelKey(method, specialization);
+                lock (syncRoot)
                 {
-                    result = Backend.Compile(method, specialization);
-                    if (cached == null)
-                        compiledKernelCache.Add(cachedKey, new WeakReference<CompiledKernel>(result));
-                    else
-                        cached.SetTarget(result);
+                    if (!compiledKernelCache.TryGetValue(cachedKey, out WeakReference<CompiledKernel> cached) ||
+                        !cached.TryGetTarget(out CompiledKernel result))
+                    {
+                        result = Backend.Compile(method, specialization);
+                        if (cached == null)
+                            compiledKernelCache.Add(cachedKey, new WeakReference<CompiledKernel>(result));
+                        else
+                            cached.SetTarget(result);
+                    }
+                    RequestGC_SyncRoot();
+                    return result;
                 }
-                RequestGC_SyncRoot();
-                return result;
             }
+            else
+                return Backend.Compile(method, specialization);
         }
 
         /// <summary>
@@ -400,6 +441,9 @@ public CompiledKernel CompileKernel(MethodInfo method, KernelSpecialization spec
         /// <remarks>This method is invoked in the scope of the locked <see cref="syncRoot"/> object.</remarks>
         private void KernelCacheGC_SyncRoot()
         {
+            if (!KernelCacheEnabled)
+                return;
+
             if (compiledKernelCache.Count >= MinNumberOfKernelsInGC)
             {
                 var oldCompiledKernels = compiledKernelCache;